diff options
196 files changed, 10226 insertions, 6757 deletions
diff --git a/debian/bin/gencontrol.py b/debian/bin/gencontrol.py index d1dbad26c..28df8a4c9 100755 --- a/debian/bin/gencontrol.py +++ b/debian/bin/gencontrol.py @@ -2,7 +2,6 @@ import sys import locale -import io import os import os.path import subprocess @@ -115,12 +114,9 @@ class Gencontrol(Base): kw_proc = subprocess.Popen( ['kernel-wedge', 'gen-control', vars['abiname']], stdout=subprocess.PIPE, + text=True, env=kw_env) - if not isinstance(kw_proc.stdout, io.IOBase): - udeb_packages = read_control(io.open(kw_proc.stdout.fileno(), - closefd=False)) - else: - udeb_packages = read_control(io.TextIOWrapper(kw_proc.stdout)) + udeb_packages = read_control(kw_proc.stdout) kw_proc.wait() if kw_proc.returncode != 0: raise RuntimeError('kernel-wedge exited with code %d' % @@ -247,16 +243,16 @@ class Gencontrol(Base): self._setup_makeflags(self.arch_makeflags, makeflags, config_base) try: - gnu_type_bytes = subprocess.check_output( + gnu_type = subprocess.check_output( ['dpkg-architecture', '-f', '-a', arch, '-q', 'DEB_HOST_GNU_TYPE'], - stderr=subprocess.DEVNULL) + stderr=subprocess.DEVNULL, + encoding='utf-8') except subprocess.CalledProcessError: # This sometimes happens for the newest ports :-/ print('W: Unable to get GNU type for %s' % arch, file=sys.stderr) else: - vars['gnu-type-package'] = ( - gnu_type_bytes.decode('utf-8').strip().replace('_', '-')) + vars['gnu-type-package'] = gnu_type.strip().replace('_', '-') def do_arch_packages(self, packages, makefile, arch, vars, makeflags, extra): diff --git a/debian/bin/gencontrol_signed.py b/debian/bin/gencontrol_signed.py index fd463c0ed..96a9e19a6 100755 --- a/debian/bin/gencontrol_signed.py +++ b/debian/bin/gencontrol_signed.py @@ -1,8 +1,6 @@ #!/usr/bin/python3 -import codecs import hashlib -import io import json import os.path import re @@ -86,12 +84,9 @@ class Gencontrol(Base): kw_proc = subprocess.Popen( ['kernel-wedge', 'gen-control', vars['abiname']], stdout=subprocess.PIPE, + text=True, env=kw_env) - if not isinstance(kw_proc.stdout, io.IOBase): - udeb_packages = read_control(io.open(kw_proc.stdout.fileno(), - closefd=False)) - else: - udeb_packages = read_control(io.TextIOWrapper(kw_proc.stdout)) + udeb_packages = read_control(kw_proc.stdout) kw_proc.wait() if kw_proc.returncode != 0: raise RuntimeError('kernel-wedge exited with code %d' % @@ -281,8 +276,8 @@ class Gencontrol(Base): re.sub(r'\+b(\d+)$', r'.b\1', re.sub(r'-', r'+', vars['imagebinaryversion'])) - with codecs.open(self.template_debian_dir + '/changelog', 'w', - 'utf-8') as f: + with open(self.template_debian_dir + '/changelog', 'w', + encoding='utf-8') as f: f.write(self.substitute('''\ linux-signed@source_suffix@-@arch@ (@signedsourceversion@) @distribution@; urgency=@urgency@ @@ -291,7 +286,8 @@ linux-signed@source_suffix@-@arch@ (@signedsourceversion@) @distribution@; urgen ''', vars)) - with codecs.open('debian/changelog', 'r', 'utf-8') as changelog_in: + with open('debian/changelog', 'r', encoding='utf-8') \ + as changelog_in: # Ignore first two header lines changelog_in.readline() changelog_in.readline() @@ -360,7 +356,7 @@ linux-signed@source_suffix@-@arch@ (@signedsourceversion@) @distribution@; urgen 'files': package_files } - with codecs.open(self.template_top_dir + '/files.json', 'w') as f: + with open(self.template_top_dir + '/files.json', 'w') as f: json.dump(all_files, f) def write_source_lintian_overrides(self): diff --git a/debian/bin/genpatch-rt b/debian/bin/genpatch-rt index e61b5f242..afaf4bed6 100755 --- a/debian/bin/genpatch-rt +++ b/debian/bin/genpatch-rt @@ -1,6 +1,5 @@ #!/usr/bin/python3 -import codecs import io import os import os.path @@ -99,12 +98,12 @@ def main(source, version=None): ['gpgv', '--status-fd', '1', '--keyring', 'debian/upstream/rt-signing-key.pgp', '--ignore-time-conflict', source_sig, '-'], - stdin=unxz_proc.stdout) + stdin=unxz_proc.stdout, + text=True) if unxz_proc.wait() or \ not re.search(r'^\[GNUPG:\]\s+VALIDSIG\s', - codecs.decode(verify_output), - re.MULTILINE): - os.write(2, verify_output) # bytes not str! + verify_output, re.MULTILINE): + sys.stderr.write(verify_output) raise RuntimeError("GPG signature verification failed") temp_dir = tempfile.mkdtemp(prefix='rt-genpatch', dir='debian') diff --git a/debian/changelog b/debian/changelog index f481357a0..e5ab8e8dd 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,3 +1,123 @@ +linux (5.19~rc6-1~exp2) UNRELEASED; urgency=medium + + * [i386] x86/asm/32: Fix ANNOTATE_UNRET_SAFE use on 32-bit (fixes FTBFS) + * [riscv64] Disable COMPAT (fixes FTBFS) + + -- Ben Hutchings <benh@debian.org> Thu, 14 Jul 2022 12:52:40 +0200 + +linux (5.19~rc6-1~exp1) experimental; urgency=medium + + * New upstream release candidate + + [ Ben Hutchings ] + * [mips64el/mips64r2el] Fix package description + * [x86] Add mitigations for RETbleed on AMD/Hygon (CVE-2022-29900) and + Intel (CVE-2022-29901) processors: + - x86/kvm/vmx: Make noinstr clean + - x86/cpufeatures: Move RETPOLINE flags to word 11 + - x86/retpoline: Cleanup some #ifdefery + - x86/retpoline: Swizzle retpoline thunk + - x86/retpoline: Use -mfunction-return + - x86: Undo return-thunk damage + - x86,objtool: Create .return_sites + - x86,static_call: Use alternative RET encoding + - x86/ftrace: Use alternative RET encoding + - x86/bpf: Use alternative RET encoding + - x86/kvm: Fix SETcc emulation for return thunks + - x86/vsyscall_emu/64: Don't use RET in vsyscall emulation + - x86/sev: Avoid using __x86_return_thunk + - x86: Use return-thunk in asm code + - x86/entry: Avoid very early RET + - objtool: Treat .text.__x86.* as noinstr + - x86: Add magic AMD return-thunk + - x86/bugs: Report AMD retbleed vulnerability + - x86/bugs: Add AMD retbleed= boot parameter + - x86/bugs: Enable STIBP for JMP2RET + - x86/bugs: Keep a per-CPU IA32_SPEC_CTRL value + - x86/entry: Add kernel IBRS implementation + - x86/bugs: Optimize SPEC_CTRL MSR writes + - x86/speculation: Add spectre_v2=ibrs option to support Kernel IBRS + - x86/bugs: Split spectre_v2_select_mitigation() and + spectre_v2_user_select_mitigation() + - x86/bugs: Report Intel retbleed vulnerability + - intel_idle: Disable IBRS during long idle + - objtool: Update Retpoline validation + - x86/xen: Rename SYS* entry points + - x86/xen: Add UNTRAIN_RET + - x86/bugs: Add retbleed=ibpb + - x86/bugs: Do IBPB fallback check only once + - objtool: Add entry UNRET validation + - x86/cpu/amd: Add Spectral Chicken + - x86/speculation: Fix RSB filling with CONFIG_RETPOLINE=n + - x86/speculation: Fix firmware entry SPEC_CTRL handling + - x86/speculation: Fix SPEC_CTRL write on SMT state change + - x86/speculation: Use cached host SPEC_CTRL value for guest entry/exit + - x86/speculation: Remove x86_spec_ctrl_mask + - objtool: Re-add UNWIND_HINT_{SAVE_RESTORE} + - KVM: VMX: Flatten __vmx_vcpu_run() + - KVM: VMX: Convert launched argument to flags + - KVM: VMX: Prevent guest RSB poisoning attacks with eIBRS + - KVM: VMX: Fix IBRS handling after vmexit + - x86/speculation: Fill RSB on vmexit for IBRS + - KVM: VMX: Prevent RSB underflow before vmenter + - x86/common: Stamp out the stepping madness + - x86/cpu/amd: Enumerate BTC_NO + - x86/retbleed: Add fine grained Kconfig knobs + - x86/bugs: Add Cannon lake to RETBleed affected CPU list + - x86/entry: Move PUSH_AND_CLEAR_REGS() back into error_entry + - x86/bugs: Do not enable IBPB-on-entry when IBPB is not supported + - x86/kexec: Disable RET on kexec + - x86/speculation: Disable RRSBA behavior + - x86/static_call: Serialize __static_call_fixup() properly + * [amd64] Enable SLS mitigation + + -- Ben Hutchings <benh@debian.org> Wed, 13 Jul 2022 01:20:17 +0200 + +linux (5.19~rc4-1~exp1) experimental; urgency=medium + + * New upstream release candidate + + [ Johannes Schauer Marin Rodrigues ] + * [arm64] Add pwm-imx27, nwl-dsi, ti-sn65dsi86, imx-dcss, mxsfb, mux-mmio + and imx8mq-interconnect to fb-modules udeb for the MNT Reform 2. + + [ Julien Massot ] + * [arm64] Enable Qualcomm SDA845 SoC: + - Enable QCOM_CLK_RPMH, SDM_GCC_845, QCOM_HFPLL, INTERCONNECT_QCOM, + INTERCONNECT_QCOM_SDM845, QCOM_PDC, PINCTRL_MSM, PINCTRL_SDM845, + RESET_QCOM_AOSS, QCOM_AOSS_QMP, QCOM_COMMAND_DB, QCOM_GENI_SE, + QCOM_RPMH, QCOM_RPMHPD, QCOM_RPMPD, SERIAL_QCOM_GENI, + SERIAL_QCOM_GENI_CONSOLE as built-in + - Enable SDM_CAMCC_845, SDM_GPUCC_845, SDM_VIDEOCC_845, SDM_DISPCC_845, + ARM_QCOM_CPUFREQ_HW, REGULATOR_QCOM_RPMH, SCSI_UFSHCD_PLATFORM, + SCSI_UFS_QCOM as modules + + [ Diederik de Haas ] + * [armel/rpi] drivers/char/hw_random: Enable HW_RANDOM_BCM2835 as module + * [armel/rpi] drivers/rtc: Enable RTC_DRV_DS1307 as module (Closes: #958904) + + [ Zhang Ning ] + * [arm64] enable gpio polled keyboard which is needed for Khadas SBCs + * [arm64] media: enable Amlogic CEC, GE2D, vdec, and Rockchip RGA, ISP1 + * [arm64] phy: enable Rk3399 Mipi phy which is needed by RK3399 ISP1 + * [arm64] enable GPIO ir receirer which is needed by Khadas edge SBC + * [arm64] enable HYM8563 RTC driver which is needed by Khadas SBCs + + [ Ben Hutchings ] + * Refresh our patches for 5.19 + * [rt] Update to 5.19-rc3-rt5 + * Update config for 5.19 + * virtio: disable notification hardening by default (regression in 5.19-rc1) + * [rt] printk: Reinstate deferred printing with kthreads + * d/bin: Use subprocess text mode support instead of decoding separately + * d/bin, d/lib/python: Use open() with encoding instead of codecs.open() + * d/lib/python/debian_linux: Introduce {Source,Binary}Package classes + * d/lib/python/debian_linux: Add all used fields to {Source,Binary}Package + * d/lib/python/debian_linux: Warn about unknown fields in control files + * firmware_loader: Enable FW_LOADER_COMPRESS, FW_LOADER_COMPRESS_XZ + + -- Ben Hutchings <benh@debian.org> Fri, 01 Jul 2022 18:01:46 +0200 + linux (5.18.8-1) UNRELEASED; urgency=medium * New upstream stable update: diff --git a/debian/config/alpha/config b/debian/config/alpha/config index 0eacc09da..e7f006e85 100644 --- a/debian/config/alpha/config +++ b/debian/config/alpha/config @@ -100,16 +100,9 @@ CONFIG_ATM_LANAI=m CONFIG_ATM_ENI=m # CONFIG_ATM_ENI_DEBUG is not set # CONFIG_ATM_ENI_TUNE_BURST is not set -CONFIG_ATM_FIRESTREAM=m -CONFIG_ATM_ZATM=m -# CONFIG_ATM_ZATM_DEBUG is not set CONFIG_ATM_IDT77252=m # CONFIG_ATM_IDT77252_DEBUG is not set # CONFIG_ATM_IDT77252_RCV_ALL is not set -CONFIG_ATM_AMBASSADOR=m -# CONFIG_ATM_AMBASSADOR_DEBUG is not set -CONFIG_ATM_HORIZON=m -# CONFIG_ATM_HORIZON_DEBUG is not set # CONFIG_ATM_FORE200E_USE_TASKLET is not set CONFIG_ATM_FORE200E_TX_RETRY=16 CONFIG_ATM_FORE200E_DEBUG=0 @@ -494,7 +487,6 @@ CONFIG_ADAPTEC_STARFIRE=m CONFIG_AMD8111_ETH=m CONFIG_LANCE=m CONFIG_PCMCIA_NMCLAN=m -CONFIG_NI65=m ## ## file: drivers/net/ethernet/broadcom/Kconfig @@ -514,7 +506,6 @@ CONFIG_DE2104X=m CONFIG_TULIP=m # CONFIG_TULIP_MWI is not set # CONFIG_TULIP_MMIO is not set -CONFIG_DE4X5=m CONFIG_WINBOND_840=m CONFIG_DM9102=m CONFIG_PCMCIA_XIRCOM=m @@ -602,10 +593,6 @@ CONFIG_PLIP=m ## file: drivers/net/wan/Kconfig ## CONFIG_WAN=y -CONFIG_HOSTESS_SV11=m -CONFIG_COSA=m -CONFIG_LANMEDIA=m -CONFIG_SEALEVEL_4021=m CONFIG_PCI200SYN=m CONFIG_WANXL=m CONFIG_N2=m diff --git a/debian/config/amd64/config b/debian/config/amd64/config index 15cdba3be..2585f9a02 100644 --- a/debian/config/amd64/config +++ b/debian/config/amd64/config @@ -26,12 +26,11 @@ CONFIG_KEXEC_SIG=y CONFIG_KEXEC_BZIMAGE_VERIFY_SIG=y CONFIG_RANDOMIZE_MEMORY=y ## choice: vsyscall table for legacy applications -# CONFIG_LEGACY_VSYSCALL_EMULATE is not set CONFIG_LEGACY_VSYSCALL_NONE=y ## end choice +CONFIG_SLS=y CONFIG_PCI_MMCONFIG=y CONFIG_ISA_DMA_API=y -# CONFIG_IA32_AOUT is not set CONFIG_X86_X32_ABI=y CONFIG_X86_X32_DISABLED=y @@ -106,7 +105,6 @@ CONFIG_EDAC_SBRIDGE=m ## ## file: drivers/gpu/drm/i915/Kconfig ## -CONFIG_DRM_I915_GVT=y CONFIG_DRM_I915_GVT_KVMGT=m ## diff --git a/debian/config/arm64/config b/debian/config/arm64/config index 0ba5a2d66..5e9137ffc 100644 --- a/debian/config/arm64/config +++ b/debian/config/arm64/config @@ -199,9 +199,16 @@ CONFIG_CLK_IMX8MQ=y CONFIG_COMMON_CLK_QCOM=y CONFIG_QCOM_CLK_RPM=m CONFIG_QCOM_CLK_SMD_RPM=m +CONFIG_QCOM_CLK_RPMH=y CONFIG_MSM_GCC_8916=y CONFIG_MSM_GCC_8996=y CONFIG_MSM_MMCC_8996=y +CONFIG_SDM_CAMCC_845=m +CONFIG_SDM_GCC_845=y +CONFIG_SDM_GPUCC_845=m +CONFIG_SDM_VIDEOCC_845=m +CONFIG_SDM_DISPCC_845=m +CONFIG_QCOM_HFPLL=y ## ## file: drivers/clk/sunxi-ng/Kconfig @@ -230,6 +237,7 @@ CONFIG_ACPI_CPPC_CPUFREQ=m CONFIG_ARM_ARMADA_37XX_CPUFREQ=m CONFIG_ARM_SCPI_CPUFREQ=m CONFIG_ARM_IMX_CPUFREQ_DT=m +CONFIG_ARM_QCOM_CPUFREQ_HW=m CONFIG_ARM_RASPBERRYPI_CPUFREQ=m ## @@ -605,6 +613,7 @@ CONFIG_INFINIBAND_HNS_HIP08=y ## CONFIG_KEYBOARD_ADC=m CONFIG_KEYBOARD_GPIO=m +CONFIG_KEYBOARD_GPIO_POLLED=m CONFIG_KEYBOARD_SNVS_PWRKEY=m CONFIG_KEYBOARD_TEGRA=m CONFIG_KEYBOARD_CROS_EC=m @@ -646,6 +655,12 @@ CONFIG_INTERCONNECT_IMX=m CONFIG_INTERCONNECT_IMX8MQ=m ## +## file: drivers/interconnect/qcom/Kconfig +## +CONFIG_INTERCONNECT_QCOM=y +CONFIG_INTERCONNECT_QCOM_SDM845=y + +## ## file: drivers/iommu/Kconfig ## CONFIG_ROCKCHIP_IOMMU=y @@ -659,6 +674,7 @@ CONFIG_QCOM_IOMMU=y ## CONFIG_SL28CPLD_INTC=y CONFIG_QCOM_IRQ_COMBINER=y +CONFIG_QCOM_PDC=y ## ## file: drivers/leds/Kconfig @@ -685,13 +701,35 @@ CONFIG_QCOM_APCS_IPC=m CONFIG_XGENE_SLIMPRO_MBOX=m ## +## file: drivers/media/cec/platform/Kconfig +## +CONFIG_CEC_MESON_AO=m +CONFIG_CEC_MESON_G12A_AO=m + +## +## file: drivers/media/platform/amlogic/meson-ge2d/Kconfig +## +CONFIG_VIDEO_MESON_GE2D=m + +## ## file: drivers/media/platform/qcom/venus/Kconfig ## CONFIG_VIDEO_QCOM_VENUS=m ## +## file: drivers/media/platform/rockchip/rga/Kconfig +## +CONFIG_VIDEO_ROCKCHIP_RGA=m + +## +## file: drivers/media/platform/rockchip/rkisp1/Kconfig +## +CONFIG_VIDEO_ROCKCHIP_ISP1=m + +## ## file: drivers/media/rc/Kconfig ## +CONFIG_IR_GPIO_CIR=m CONFIG_IR_MESON=m ## @@ -1170,6 +1208,7 @@ CONFIG_PHY_QCOM_USB_HSIC=m ## file: drivers/phy/rockchip/Kconfig ## CONFIG_PHY_ROCKCHIP_DP=m +CONFIG_PHY_ROCKCHIP_DPHY_RX0=m CONFIG_PHY_ROCKCHIP_EMMC=m CONFIG_PHY_ROCKCHIP_INNO_HDMI=m CONFIG_PHY_ROCKCHIP_INNO_USB2=m @@ -1212,6 +1251,7 @@ CONFIG_PINCTRL_MSM8916=y CONFIG_PINCTRL_MSM8996=y CONFIG_PINCTRL_QCOM_SPMI_PMIC=y CONFIG_PINCTRL_QCOM_SSBI_PMIC=y +CONFIG_PINCTRL_SDM845=y ## ## file: drivers/platform/chrome/Kconfig @@ -1281,6 +1321,7 @@ CONFIG_REGULATOR_PCA9450=m CONFIG_REGULATOR_PFUZE100=m CONFIG_REGULATOR_PWM=m CONFIG_REGULATOR_QCOM_RPM=m +CONFIG_REGULATOR_QCOM_RPMH=m CONFIG_REGULATOR_QCOM_SMD_RPM=m CONFIG_REGULATOR_QCOM_SPMI=m CONFIG_REGULATOR_RASPBERRYPI_TOUCHSCREEN_ATTINY=m @@ -1299,6 +1340,7 @@ CONFIG_QCOM_Q6V5_MSS=m ## CONFIG_RESET_CONTROLLER=y CONFIG_RESET_IMX7=m +CONFIG_RESET_QCOM_AOSS=y ## ## file: drivers/rpmsg/Kconfig @@ -1311,6 +1353,7 @@ CONFIG_RPMSG_QCOM_SMD=m ## file: drivers/rtc/Kconfig ## CONFIG_RTC_DRV_DS1307=y +CONFIG_RTC_DRV_HYM8563=m CONFIG_RTC_DRV_MAX77686=y CONFIG_RTC_DRV_RK808=y CONFIG_RTC_DRV_PCF8523=m @@ -1356,7 +1399,13 @@ CONFIG_DPAA2_CONSOLE=m ## ## file: drivers/soc/qcom/Kconfig ## +CONFIG_QCOM_AOSS_QMP=y +CONFIG_QCOM_COMMAND_DB=y +CONFIG_QCOM_GENI_SE=y CONFIG_QCOM_GSBI=m +CONFIG_QCOM_RPMH=y +CONFIG_QCOM_RPMHPD=y +CONFIG_QCOM_RPMPD=y CONFIG_QCOM_SMEM=m CONFIG_QCOM_SMD_RPM=m CONFIG_QCOM_SMP2P=m @@ -1412,6 +1461,11 @@ CONFIG_STAGING_MEDIA=y CONFIG_VIDEO_HANTRO=m ## +## file: drivers/staging/media/meson/vdec/Kconfig +## +CONFIG_VIDEO_MESON_VDEC=m + +## ## file: drivers/staging/media/rkvdec/Kconfig ## CONFIG_VIDEO_ROCKCHIP_VDEC=m @@ -1494,6 +1548,8 @@ CONFIG_SERIAL_IMX=y CONFIG_SERIAL_IMX_CONSOLE=y CONFIG_SERIAL_MSM=y CONFIG_SERIAL_MSM_CONSOLE=y +CONFIG_SERIAL_QCOM_GENI=y +CONFIG_SERIAL_QCOM_GENI_CONSOLE=y CONFIG_SERIAL_XILINX_PS_UART=y CONFIG_SERIAL_XILINX_PS_UART_CONSOLE=y CONFIG_SERIAL_FSL_LPUART=y @@ -1517,6 +1573,12 @@ CONFIG_SERIAL_8250_DW=y CONFIG_SERIAL_OF_PLATFORM=y ## +## file: drivers/ufs/host/Kconfig +## +CONFIG_SCSI_UFSHCD_PLATFORM=m +CONFIG_SCSI_UFS_QCOM=m + +## ## file: drivers/usb/chipidea/Kconfig ## CONFIG_USB_CHIPIDEA=m diff --git a/debian/config/arm64/config.cloud-arm64 b/debian/config/arm64/config.cloud-arm64 index 9091c3b39..06ebdbe2e 100644 --- a/debian/config/arm64/config.cloud-arm64 +++ b/debian/config/arm64/config.cloud-arm64 @@ -91,6 +91,11 @@ CONFIG_ACPI_CPPC_CPUFREQ=m CONFIG_ACPI_I2C_OPREGION=y ## +## file: drivers/input/keyboard/Kconfig +## +# CONFIG_KEYBOARD_GPIO_POLLED is not set + +## ## file: drivers/power/reset/Kconfig ## CONFIG_POWER_RESET=y diff --git a/debian/config/armel/config.marvell b/debian/config/armel/config.marvell index d9b4b24bc..eb6ad6050 100644 --- a/debian/config/armel/config.marvell +++ b/debian/config/armel/config.marvell @@ -718,9 +718,6 @@ CONFIG_NLS=m ## choice: Compiler optimization level CONFIG_CC_OPTIMIZE_FOR_SIZE=y ## end choice -# CONFIG_USERFAULTFD is not set -# CONFIG_SLUB_DEBUG is not set -# CONFIG_SLAB_FREELIST_HARDENED is not set # CONFIG_PROFILING is not set # CONFIG_MODULE_SIG is not set @@ -753,17 +750,20 @@ CONFIG_CC_OPTIMIZE_FOR_SIZE=y ## ## file: mm/Kconfig ## +# CONFIG_ZSWAP is not set +# CONFIG_SLAB_FREELIST_HARDENED is not set ## choice: Memory model CONFIG_FLATMEM_MANUAL=y # CONFIG_SPARSEMEM_MANUAL is not set ## end choice # CONFIG_KSM is not set -# CONFIG_ZSWAP is not set +# CONFIG_USERFAULTFD is not set ## ## file: mm/Kconfig.debug ## # CONFIG_PAGE_EXTENSION is not set +# CONFIG_SLUB_DEBUG is not set # CONFIG_PAGE_POISONING is not set ## diff --git a/debian/config/armel/config.rpi b/debian/config/armel/config.rpi index 416bb1ba6..4fc67304a 100644 --- a/debian/config/armel/config.rpi +++ b/debian/config/armel/config.rpi @@ -17,6 +17,11 @@ CONFIG_ARCH_BCM2835=y CONFIG_BT_HCIUART=m ## +## file: drivers/char/hw_random/Kconfig +## +CONFIG_HW_RANDOM_BCM2835=m + +## ## file: drivers/clk/bcm/Kconfig ## CONFIG_CLK_RASPBERRYPI=y @@ -102,6 +107,11 @@ CONFIG_PWM=y CONFIG_PWM_BCM2835=y ## +## file: drivers/rtc/Kconfig +## +CONFIG_RTC_DRV_DS1307=m + +## ## file: drivers/soc/bcm/Kconfig ## CONFIG_RASPBERRYPI_POWER=y diff --git a/debian/config/armhf/config b/debian/config/armhf/config index 2a11d7ded..f8c57fad4 100644 --- a/debian/config/armhf/config +++ b/debian/config/armhf/config @@ -48,7 +48,9 @@ CONFIG_CRYPTO_AES_ARM_CE=m CONFIG_CRYPTO_GHASH_ARM_CE=m CONFIG_CRYPTO_CRCT10DIF_ARM_CE=m CONFIG_CRYPTO_CRC32_ARM_CE=m +CONFIG_CRYPTO_CHACHA20_NEON=m CONFIG_CRYPTO_NHPOLY1305_NEON=m +CONFIG_CRYPTO_CURVE25519_NEON=m ## ## file: arch/arm/mach-aspeed/Kconfig @@ -116,6 +118,10 @@ CONFIG_MACH_DOVE=y CONFIG_OMAP_MUX=y # CONFIG_OMAP_MUX_DEBUG is not set CONFIG_OMAP_MUX_WARNINGS=y +CONFIG_OMAP_32K_TIMER=y +# CONFIG_OMAP_MPU_TIMER is not set +CONFIG_OMAP_SERIAL_WAKE=y +CONFIG_OMAP_RESET_CLOCKS=y ## ## file: arch/arm/mach-omap2/Kconfig @@ -127,6 +133,9 @@ CONFIG_SOC_OMAP5=y CONFIG_SOC_AM33XX=y CONFIG_SOC_DRA7XX=y CONFIG_ARCH_OMAP2PLUS_TYPICAL=y +CONFIG_POWER_AVS_OMAP=y +CONFIG_POWER_AVS_OMAP_CLASS3=y +# CONFIG_OMAP3_L2_AUX_SECURE_SAVE_RESTORE is not set CONFIG_SOC_OMAP3430=y CONFIG_SOC_TI81XX=y @@ -156,7 +165,7 @@ CONFIG_ARCH_SUNXI=y CONFIG_ARCH_TEGRA=y ## -## file: arch/arm/mach-vexpress/Kconfig +## file: arch/arm/mach-versatile/Kconfig ## CONFIG_ARCH_VEXPRESS=y CONFIG_ARCH_VEXPRESS_CORTEX_A5_A9_ERRATA=y @@ -173,17 +182,6 @@ CONFIG_ARM_THUMBEE=y CONFIG_CACHE_TAUROS2=y ## -## file: arch/arm/plat-omap/Kconfig -## -CONFIG_POWER_AVS_OMAP=y -CONFIG_POWER_AVS_OMAP_CLASS3=y -CONFIG_OMAP_RESET_CLOCKS=y -# CONFIG_OMAP_MPU_TIMER is not set -CONFIG_OMAP_32K_TIMER=y -# CONFIG_OMAP3_L2_AUX_SECURE_SAVE_RESTORE is not set -CONFIG_OMAP_SERIAL_WAKE=y - -## ## file: drivers/android/Kconfig ## CONFIG_ANDROID=y @@ -856,11 +854,6 @@ CONFIG_MTD_NAND_SUNXI=m CONFIG_MTD_NAND_STM32_FMC2=m ## -## file: drivers/mtd/spi-nor/controllers/Kconfig -## -CONFIG_SPI_ASPEED_SMC=m - -## ## file: drivers/mux/Kconfig ## CONFIG_MUX_MMIO=m @@ -1325,6 +1318,7 @@ CONFIG_ARCH_TEGRA_124_SOC=y ## file: drivers/spi/Kconfig ## CONFIG_SPI=y +CONFIG_SPI_ASPEED_SMC=m CONFIG_SPI_BCM2835=m CONFIG_SPI_BCM2835AUX=m CONFIG_SPI_GPIO=y diff --git a/debian/config/config b/debian/config/config index c225f1e25..6ddac3947 100644 --- a/debian/config/config +++ b/debian/config/config @@ -123,7 +123,7 @@ CONFIG_CRYPTO_SHA1=m CONFIG_CRYPTO_SHA256=y CONFIG_CRYPTO_SHA512=m CONFIG_CRYPTO_SHA3=m -# CONFIG_CRYPTO_SM3 is not set +# CONFIG_CRYPTO_SM3_GENERIC is not set CONFIG_CRYPTO_WP512=m CONFIG_CRYPTO_AES=m CONFIG_CRYPTO_AES_TI=m @@ -139,7 +139,7 @@ CONFIG_CRYPTO_KHAZAD=m CONFIG_CRYPTO_CHACHA20=m CONFIG_CRYPTO_SEED=m CONFIG_CRYPTO_SERPENT=m -# CONFIG_CRYPTO_SM4 is not set +# CONFIG_CRYPTO_SM4_GENERIC is not set CONFIG_CRYPTO_TEA=m CONFIG_CRYPTO_TWOFISH=m CONFIG_CRYPTO_DEFLATE=m @@ -347,6 +347,9 @@ CONFIG_ALLOW_DEV_COREDUMP=y CONFIG_FW_LOADER=y CONFIG_EXTRA_FIRMWARE="" # CONFIG_FW_LOADER_USER_HELPER_FALLBACK is not set +CONFIG_FW_LOADER_COMPRESS=y +CONFIG_FW_LOADER_COMPRESS_XZ=y +# CONFIG_FW_LOADER_COMPRESS_ZSTD is not set ## ## file: drivers/base/test/Kconfig @@ -703,7 +706,6 @@ CONFIG_GPIO_VIPERBOARD=m ## ## file: drivers/gpu/drm/Kconfig ## -CONFIG_DRM_DP_AUX_CHARDEV=y # CONFIG_DRM_DEBUG_SELFTEST is not set CONFIG_DRM_FBDEV_EMULATION=y CONFIG_DRM_FBDEV_OVERALLOC=100 @@ -755,6 +757,11 @@ CONFIG_DRM_AMD_DC_SI=y # CONFIG_DRM_ANALOGIX_ANX78XX is not set ## +## file: drivers/gpu/drm/display/Kconfig +## +CONFIG_DRM_DP_AUX_CHARDEV=y + +## ## file: drivers/gpu/drm/hisilicon/hibmc/Kconfig ## # CONFIG_DRM_HISI_HIBMC is not set @@ -3118,7 +3125,6 @@ CONFIG_NET_FAILOVER=m ## CONFIG_ATALK=m CONFIG_DEV_APPLETALK=y -CONFIG_LTPC=m CONFIG_IPDDP=m CONFIG_IPDDP_ENCAP=y @@ -3130,7 +3136,6 @@ CONFIG_CAN_VXCAN=m CONFIG_CAN_SLCAN=m CONFIG_CAN_DEV=m CONFIG_CAN_CALC_BITTIMING=y -# CONFIG_CAN_LEDS is not set # CONFIG_CAN_GRCAN is not set # CONFIG_CAN_DEBUG_DEVICES is not set @@ -3369,7 +3374,6 @@ CONFIG_NET_VENDOR_DEC=y CONFIG_DE2104X_DSL=0 CONFIG_TULIP_NAPI=y CONFIG_TULIP_NAPI_HW_MITIGATION=y -# CONFIG_DE4X5 is not set CONFIG_ULI526X=m ## @@ -3726,7 +3730,6 @@ CONFIG_DEFXX=m CONFIG_MKISS=m CONFIG_6PACK=m CONFIG_BPQETHER=m -CONFIG_DMASCC=m CONFIG_SCC=m # CONFIG_SCC_DELAY is not set # CONFIG_SCC_TRXECHO is not set @@ -3742,7 +3745,6 @@ CONFIG_YAM=m CONFIG_IEEE802154_DRIVERS=m CONFIG_IEEE802154_FAKELB=m CONFIG_IEEE802154_AT86RF230=m -# CONFIG_IEEE802154_AT86RF230_DEBUGFS is not set CONFIG_IEEE802154_MRF24J40=m CONFIG_IEEE802154_CC2520=m CONFIG_IEEE802154_ATUSB=m @@ -4907,14 +4909,6 @@ CONFIG_SCSI_QLA_ISCSI=m CONFIG_SCSI_SMARTPQI=m ## -## file: drivers/scsi/ufs/Kconfig -## -CONFIG_SCSI_UFSHCD=m -CONFIG_SCSI_UFSHCD_PCI=m -# CONFIG_SCSI_UFS_DWC_TC_PCI is not set -# CONFIG_SCSI_UFSHCD_PLATFORM is not set - -## ## file: drivers/soc/ti/Kconfig ## # CONFIG_SOC_TI is not set @@ -5222,6 +5216,18 @@ CONFIG_SERIAL_8250_HUB6=m CONFIG_SERIAL_OF_PLATFORM=m ## +## file: drivers/ufs/Kconfig +## +CONFIG_SCSI_UFSHCD=m + +## +## file: drivers/ufs/host/Kconfig +## +CONFIG_SCSI_UFSHCD_PCI=m +# CONFIG_SCSI_UFS_DWC_TC_PCI is not set +# CONFIG_SCSI_UFSHCD_PLATFORM is not set + +## ## file: drivers/uio/Kconfig ## CONFIG_UIO=m @@ -6327,7 +6333,6 @@ CONFIG_KERNEL_XZ=y # CONFIG_KERNEL_LZ4 is not set ## end choice CONFIG_DEFAULT_HOSTNAME="(none)" -CONFIG_SWAP=y CONFIG_SYSVIPC=y CONFIG_POSIX_MQUEUE=y CONFIG_CROSS_MEMORY_ATTACH=y @@ -6411,26 +6416,12 @@ CONFIG_ADVISE_SYSCALLS=y CONFIG_MEMBARRIER=y CONFIG_KALLSYMS=y # CONFIG_KALLSYMS_ALL is not set -CONFIG_USERFAULTFD=y CONFIG_RSEQ=y # CONFIG_DEBUG_RSEQ is not set # CONFIG_EMBEDDED is not set # CONFIG_PC104 is not set CONFIG_PERF_EVENTS=y # CONFIG_DEBUG_PERF_USE_VMALLOC is not set -CONFIG_VM_EVENT_COUNTERS=y -CONFIG_SLUB_DEBUG=y -# CONFIG_COMPAT_BRK is not set -## choice: Choose SLAB allocator -# CONFIG_SLAB is not set -CONFIG_SLUB=y -# CONFIG_SLOB is not set -## end choice -CONFIG_SLAB_MERGE_DEFAULT=y -CONFIG_SLAB_FREELIST_RANDOM=y -CONFIG_SLAB_FREELIST_HARDENED=y -CONFIG_SLUB_CPU_PARTIAL=y -# CONFIG_MMAP_ALLOW_UNINITIALIZED is not set CONFIG_PROFILING=y CONFIG_MODULES=y CONFIG_MODULE_FORCE_LOAD=y @@ -6611,8 +6602,6 @@ CONFIG_MAGIC_SYSRQ=y CONFIG_MAGIC_SYSRQ_DEFAULT_ENABLE=0x01b6 CONFIG_DEBUG_FS=y # CONFIG_DEBUG_OBJECTS is not set -# CONFIG_SLUB_DEBUG_ON is not set -# CONFIG_SLUB_STATS is not set # CONFIG_DEBUG_KMEMLEAK is not set CONFIG_SCHED_STACK_END_CHECK=y # CONFIG_DEBUG_VM is not set @@ -6739,6 +6728,24 @@ CONFIG_CORDIC=m ## ## file: mm/Kconfig ## +CONFIG_SWAP=y +CONFIG_ZSWAP=y +CONFIG_ZBUD=y +CONFIG_Z3FOLD=m +CONFIG_ZSMALLOC=m +# CONFIG_ZSMALLOC_STAT is not set +## choice: Choose SLAB allocator +# CONFIG_SLAB is not set +CONFIG_SLUB=y +# CONFIG_SLOB is not set +## end choice +CONFIG_SLAB_MERGE_DEFAULT=y +CONFIG_SLAB_FREELIST_RANDOM=y +CONFIG_SLAB_FREELIST_HARDENED=y +# CONFIG_SLUB_STATS is not set +CONFIG_SLUB_CPU_PARTIAL=y +# CONFIG_COMPAT_BRK is not set +# CONFIG_MMAP_ALLOW_UNINITIALIZED is not set CONFIG_MEMORY_HOTPLUG=y # CONFIG_MEMORY_HOTPLUG_DEFAULT_ONLINE is not set CONFIG_MEMORY_HOTREMOVE=y @@ -6757,20 +6764,19 @@ CONFIG_TRANSPARENT_HUGEPAGE_ALWAYS=y ## end choice # CONFIG_CMA is not set CONFIG_MEM_SOFT_DIRTY=y -CONFIG_ZSWAP=y -CONFIG_ZBUD=y -CONFIG_Z3FOLD=m -CONFIG_ZSMALLOC=m -# CONFIG_ZSMALLOC_STAT is not set CONFIG_DEFERRED_STRUCT_PAGE_INIT=y # CONFIG_IDLE_PAGE_TRACKING is not set +CONFIG_VM_EVENT_COUNTERS=y # CONFIG_PERCPU_STATS is not set +CONFIG_USERFAULTFD=y ## ## file: mm/Kconfig.debug ## CONFIG_PAGE_EXTENSION=y # CONFIG_DEBUG_PAGEALLOC is not set +CONFIG_SLUB_DEBUG=y +# CONFIG_SLUB_DEBUG_ON is not set # CONFIG_PAGE_OWNER is not set CONFIG_PAGE_POISONING=y # CONFIG_DEBUG_PAGE_REF is not set @@ -7703,7 +7709,6 @@ CONFIG_SECURITY_PATH=y # CONFIG_INTEL_TXT is not set CONFIG_LSM_MMAP_MIN_ADDR=32768 CONFIG_HARDENED_USERCOPY=y -# CONFIG_HARDENED_USERCOPY_PAGESPAN is not set CONFIG_FORTIFY_SOURCE=y ## choice: First legacy 'major LSM' to be initialized CONFIG_DEFAULT_SECURITY_APPARMOR=y @@ -8060,7 +8065,8 @@ CONFIG_SND_SOC_RT715_SDW=m # CONFIG_SND_SOC_WM8580 is not set # CONFIG_SND_SOC_WM8711 is not set # CONFIG_SND_SOC_WM8728 is not set -# CONFIG_SND_SOC_WM8731 is not set +# CONFIG_SND_SOC_WM8731_I2C is not set +# CONFIG_SND_SOC_WM8731_SPI is not set # CONFIG_SND_SOC_WM8737 is not set # CONFIG_SND_SOC_WM8741 is not set # CONFIG_SND_SOC_WM8750 is not set @@ -8165,3 +8171,4 @@ CONFIG_RD_LZMA=y CONFIG_RD_XZ=y CONFIG_RD_LZO=y CONFIG_RD_LZ4=y +CONFIG_RD_ZSTD=y diff --git a/debian/config/config.cloud b/debian/config/config.cloud index 7255dbf76..c8634de31 100644 --- a/debian/config/config.cloud +++ b/debian/config/config.cloud @@ -1204,11 +1204,6 @@ CONFIG_HYPERV_STORAGE=m # CONFIG_SCSI_SMARTPQI is not set ## -## file: drivers/scsi/ufs/Kconfig -## -# CONFIG_SCSI_UFSHCD is not set - -## ## file: drivers/spi/Kconfig ## # CONFIG_SPI is not set @@ -1266,6 +1261,11 @@ CONFIG_HYPERV_STORAGE=m # CONFIG_SERIAL_8250_MID is not set ## +## file: drivers/ufs/Kconfig +## +# CONFIG_SCSI_UFSHCD is not set + +## ## file: drivers/usb/Kconfig ## # CONFIG_USB_SUPPORT is not set diff --git a/debian/config/defines b/debian/config/defines index 7ebf20428..0d0697432 100644 --- a/debian/config/defines +++ b/debian/config/defines @@ -1,5 +1,5 @@ [abi] -abiname: 3 +abiname: trunk ignore-changes: __cpuhp_* __udp_gso_segment diff --git a/debian/config/hppa/config b/debian/config/hppa/config index 9eb3b22ac..c595f65c8 100644 --- a/debian/config/hppa/config +++ b/debian/config/hppa/config @@ -199,7 +199,6 @@ CONFIG_ADAPTEC_STARFIRE=m CONFIG_AMD8111_ETH=m # CONFIG_LANCE is not set CONFIG_PCMCIA_NMCLAN=m -# CONFIG_NI65 is not set ## ## file: drivers/net/ethernet/broadcom/Kconfig diff --git a/debian/config/i386/config b/debian/config/i386/config index 358026d3e..6880da062 100644 --- a/debian/config/i386/config +++ b/debian/config/i386/config @@ -297,7 +297,6 @@ CONFIG_WD80x3=m ## file: drivers/net/ethernet/amd/Kconfig ## CONFIG_LANCE=m -CONFIG_NI65=m ## ## file: drivers/net/ethernet/cirrus/Kconfig @@ -323,9 +322,6 @@ CONFIG_SMC9194=m ## ## file: drivers/net/wan/Kconfig ## -CONFIG_HOSTESS_SV11=m -CONFIG_COSA=m -CONFIG_SEALEVEL_4021=m CONFIG_N2=m CONFIG_C101=m diff --git a/debian/config/ia64/config b/debian/config/ia64/config index 9fd977423..92683aabd 100644 --- a/debian/config/ia64/config +++ b/debian/config/ia64/config @@ -450,7 +450,6 @@ CONFIG_PLIP=m ## file: drivers/net/wan/Kconfig ## CONFIG_WAN=y -CONFIG_LANMEDIA=m CONFIG_PCI200SYN=m CONFIG_WANXL=m CONFIG_FARSYNC=m diff --git a/debian/config/kernelarch-arm/config b/debian/config/kernelarch-arm/config index 903ca5a91..4ebde3f86 100644 --- a/debian/config/kernelarch-arm/config +++ b/debian/config/kernelarch-arm/config @@ -28,6 +28,7 @@ CONFIG_CRYPTO_SHA256_ARM=m CONFIG_CRYPTO_SHA512_ARM=m CONFIG_CRYPTO_BLAKE2S_ARM=m CONFIG_CRYPTO_AES_ARM=m +CONFIG_CRYPTO_POLY1305_ARM=m ## ## file: arch/arm/mm/Kconfig diff --git a/debian/config/kernelarch-powerpc/config b/debian/config/kernelarch-powerpc/config index 7d86773e2..34bc25a2d 100644 --- a/debian/config/kernelarch-powerpc/config +++ b/debian/config/kernelarch-powerpc/config @@ -107,19 +107,12 @@ CONFIG_ATM_LANAI=m CONFIG_ATM_ENI=m # CONFIG_ATM_ENI_DEBUG is not set # CONFIG_ATM_ENI_TUNE_BURST is not set -CONFIG_ATM_FIRESTREAM=m -CONFIG_ATM_ZATM=m -CONFIG_ATM_ZATM_DEBUG=y CONFIG_ATM_NICSTAR=m CONFIG_ATM_NICSTAR_USE_SUNI=y CONFIG_ATM_NICSTAR_USE_IDT77105=y CONFIG_ATM_IDT77252=m # CONFIG_ATM_IDT77252_DEBUG is not set # CONFIG_ATM_IDT77252_RCV_ALL is not set -CONFIG_ATM_AMBASSADOR=m -# CONFIG_ATM_AMBASSADOR_DEBUG is not set -CONFIG_ATM_HORIZON=m -# CONFIG_ATM_HORIZON_DEBUG is not set CONFIG_ATM_IA=m # CONFIG_ATM_IA_DEBUG is not set CONFIG_ATM_FORE200E_USE_TASKLET=y @@ -555,7 +548,6 @@ CONFIG_HIPPI=y ## file: drivers/net/wan/Kconfig ## CONFIG_WAN=y -CONFIG_LANMEDIA=m CONFIG_PCI200SYN=m CONFIG_WANXL=m CONFIG_FARSYNC=m diff --git a/debian/config/kernelarch-powerpc/config-arch-64-be b/debian/config/kernelarch-powerpc/config-arch-64-be index 363d14c5b..f93080b6f 100644 --- a/debian/config/kernelarch-powerpc/config-arch-64-be +++ b/debian/config/kernelarch-powerpc/config-arch-64-be @@ -1,6 +1,7 @@ ## ## file: arch/powerpc/Kconfig ## +CONFIG_COMPAT=y CONFIG_CMDLINE="console=hvsi0 console=hvc0 console=ttyS0,9600 console=tty0" ## diff --git a/debian/config/kernelarch-x86/config b/debian/config/kernelarch-x86/config index a743bd4f0..e17ff56e1 100644 --- a/debian/config/kernelarch-x86/config +++ b/debian/config/kernelarch-x86/config @@ -8,7 +8,6 @@ CONFIG_SECCOMP=y ## file: arch/x86/Kconfig ## CONFIG_X86_MPPARSE=y -CONFIG_RETPOLINE=y CONFIG_X86_CPU_RESCTRL=y # CONFIG_X86_EXTENDED_PLATFORM is not set CONFIG_X86_INTEL_LPSS=y @@ -52,7 +51,6 @@ CONFIG_MTRR_SANITIZER_ENABLE_DEFAULT=0 CONFIG_MTRR_SANITIZER_SPARE_REG_NR_DEFAULT=1 CONFIG_X86_PAT=y CONFIG_ARCH_RANDOM=y -CONFIG_X86_SMAP=y CONFIG_X86_UMIP=y CONFIG_EFI=y CONFIG_EFI_STUB=y @@ -66,6 +64,7 @@ CONFIG_RANDOMIZE_BASE=y # CONFIG_COMPAT_VDSO is not set # CONFIG_CMDLINE_BOOL is not set CONFIG_MODIFY_LDT_SYSCALL=y +CONFIG_RETPOLINE=y # CONFIG_PCI_CNB20LE_QUIRK is not set # CONFIG_ISA_BUS is not set CONFIG_IA32_EMULATION=y @@ -243,16 +242,9 @@ CONFIG_ATM_LANAI=m CONFIG_ATM_ENI=m # CONFIG_ATM_ENI_DEBUG is not set # CONFIG_ATM_ENI_TUNE_BURST is not set -CONFIG_ATM_FIRESTREAM=m -CONFIG_ATM_ZATM=m -# CONFIG_ATM_ZATM_DEBUG is not set CONFIG_ATM_IDT77252=m # CONFIG_ATM_IDT77252_DEBUG is not set # CONFIG_ATM_IDT77252_RCV_ALL is not set -CONFIG_ATM_AMBASSADOR=m -# CONFIG_ATM_AMBASSADOR_DEBUG is not set -CONFIG_ATM_HORIZON=m -# CONFIG_ATM_HORIZON_DEBUG is not set # CONFIG_ATM_FORE200E_USE_TASKLET is not set CONFIG_ATM_FORE200E_TX_RETRY=16 CONFIG_ATM_FORE200E_DEBUG=0 @@ -573,7 +565,6 @@ CONFIG_GPIO_ML_IOH=m ## file: drivers/gpu/drm/Kconfig ## CONFIG_DRM=m -CONFIG_DRM_DP_CEC=y CONFIG_DRM_RADEON=m CONFIG_DRM_AMDGPU=m CONFIG_DRM_HYPERV=m @@ -594,6 +585,11 @@ CONFIG_HSA_AMD=y CONFIG_DRM_AST=m ## +## file: drivers/gpu/drm/display/Kconfig +## +CONFIG_DRM_DP_CEC=y + +## ## file: drivers/gpu/drm/gma500/Kconfig ## CONFIG_DRM_GMA500=m @@ -1278,7 +1274,6 @@ CONFIG_PLIP=m ## file: drivers/net/wan/Kconfig ## CONFIG_WAN=y -CONFIG_LANMEDIA=m CONFIG_PCI200SYN=m CONFIG_WANXL=m CONFIG_FARSYNC=m @@ -1484,8 +1479,8 @@ CONFIG_PANASONIC_LAPTOP=m CONFIG_SONY_LAPTOP=m CONFIG_SONYPI_COMPAT=y CONFIG_TOPSTAR_LAPTOP=m -CONFIG_X86_ANDROID_TABLETS=m CONFIG_SERIAL_MULTI_INSTANTIATE=m +CONFIG_X86_ANDROID_TABLETS=m CONFIG_INTEL_IPS=m CONFIG_SIEMENS_SIMATIC_IPC=m @@ -1908,10 +1903,10 @@ CONFIG_MACHZ_WDT=m CONFIG_INTEL_MEI_WDT=m CONFIG_NI903X_WDT=m CONFIG_NIC7018_WDT=m +CONFIG_SIEMENS_SIMATIC_IPC_WDT=m CONFIG_PCIPCWATCHDOG=m CONFIG_WDTPCI=m CONFIG_USBPCWATCHDOG=m -CONFIG_SIEMENS_SIMATIC_IPC_WDT=m ## ## file: drivers/xen/Kconfig diff --git a/debian/config/m68k/config b/debian/config/m68k/config index 319c6928f..cb164dc1e 100644 --- a/debian/config/m68k/config +++ b/debian/config/m68k/config @@ -499,11 +499,6 @@ CONFIG_MVME16x_SCSI=m CONFIG_BVME6000_SCSI=m ## -## file: drivers/scsi/ufs/Kconfig -## -# CONFIG_SCSI_UFSHCD is not set - -## ## file: drivers/spi/Kconfig ## # CONFIG_SPI is not set @@ -543,6 +538,11 @@ CONFIG_SERIAL_8250_RUNTIME_UARTS=4 # CONFIG_SERIAL_8250_EXTENDED is not set ## +## file: drivers/ufs/Kconfig +## +# CONFIG_SCSI_UFSHCD is not set + +## ## file: drivers/usb/Kconfig ## # CONFIG_USB_SUPPORT is not set @@ -699,7 +699,6 @@ CONFIG_CC_OPTIMIZE_FOR_SIZE=y ## end choice CONFIG_SGETMASK_SYSCALL=y CONFIG_SYSFS_SYSCALL=y -CONFIG_COMPAT_BRK=y # CONFIG_MODULE_SIG is not set ## @@ -730,6 +729,7 @@ CONFIG_FONT_PEARL_8x8=y ## ## file: mm/Kconfig ## +CONFIG_COMPAT_BRK=y # CONFIG_COMPACTION is not set # CONFIG_MIGRATION is not set # CONFIG_KSM is not set diff --git a/debian/config/mips64el/defines b/debian/config/mips64el/defines index a3021f94d..7ad27927c 100644 --- a/debian/config/mips64el/defines +++ b/debian/config/mips64el/defines @@ -22,8 +22,8 @@ configs: kernelarch-mips/config.mips64r2 [mips64r2el_description] -hardware: MIPS Generic (mips64r2eb) -hardware-long: MIPS Generic platform (mips64r2eb) +hardware: MIPS Generic (mips64r2el) +hardware-long: MIPS Generic platform (mips64r2el) [mips64r2el_image] configs: diff --git a/debian/config/riscv64/config b/debian/config/riscv64/config index 0f65064a3..1a04e34a5 100644 --- a/debian/config/riscv64/config +++ b/debian/config/riscv64/config @@ -9,6 +9,7 @@ CONFIG_SECCOMP=y CONFIG_SMP=y CONFIG_NUMA=y CONFIG_KEXEC=y +# CONFIG_COMPAT is not set ## ## file: arch/riscv/Kconfig.socs diff --git a/debian/config/s390x/config b/debian/config/s390x/config index 213b3889d..40f4d5a85 100644 --- a/debian/config/s390x/config +++ b/debian/config/s390x/config @@ -44,6 +44,15 @@ CONFIG_IBM_PARTITION=y CONFIG_BSD_DISKLABEL=y ## +## file: crypto/Kconfig +## +CONFIG_CRYPTO_SHA512_S390=m +CONFIG_CRYPTO_SHA1_S390=m +CONFIG_CRYPTO_SHA256_S390=m +CONFIG_CRYPTO_AES_S390=m +CONFIG_CRYPTO_DES_S390=m + +## ## file: drivers/block/Kconfig ## # CONFIG_CDROM_PKTCDVD is not set @@ -52,11 +61,6 @@ CONFIG_BSD_DISKLABEL=y ## file: drivers/crypto/Kconfig ## CONFIG_ZCRYPT=m -CONFIG_CRYPTO_SHA1_S390=m -CONFIG_CRYPTO_SHA256_S390=m -CONFIG_CRYPTO_SHA512_S390=m -CONFIG_CRYPTO_DES_S390=m -CONFIG_CRYPTO_AES_S390=m CONFIG_S390_PRNG=m ## diff --git a/debian/installer/modules/arm64/fb-modules b/debian/installer/modules/arm64/fb-modules index 182814376..f9bc179fe 100644 --- a/debian/installer/modules/arm64/fb-modules +++ b/debian/installer/modules/arm64/fb-modules @@ -23,3 +23,12 @@ pwm-rockchip # For HDMI on some rockchip platforms (e.g. rock64-rk3328) nvmem-rockchip-efuse phy-rockchip-inno-hdmi + +# For panel/backlight on MNT Reform 2 +pwm-imx27 +nwl-dsi +ti-sn65dsi86 +imx-dcss +mxsfb +mux-mmio +imx8mq-interconnect diff --git a/debian/installer/package-list b/debian/installer/package-list index a745d27bf..b1b3bf0b8 100644 --- a/debian/installer/package-list +++ b/debian/installer/package-list @@ -75,8 +75,6 @@ Description: PATA drivers Package: cdrom-core-modules Depends: kernel-image, scsi-core-modules -Depends_ia64: kernel-image, scsi-core-modules -Depends_m68k: kernel-image, scsi-core-modules Priority: standard Description: CDROM support This package contains core CDROM support for the kernel. diff --git a/debian/lib/python/debian_linux/debian.py b/debian/lib/python/debian_linux/debian.py index 16a878ca7..ffff1ed1c 100644 --- a/debian/lib/python/debian_linux/debian.py +++ b/debian/lib/python/debian_linux/debian.py @@ -4,6 +4,7 @@ import functools import os.path import re import unittest +import warnings from . import utils @@ -729,6 +730,9 @@ class _ControlFileDict(dict): if not isinstance(value, cls): value = cls(value) except KeyError: + warnings.warn( + f'setting unknown field { key } in { type(self).__name__ }', + stacklevel=2) pass super(_ControlFileDict, self).__setitem__(key, value) @@ -750,9 +754,8 @@ class _ControlFileDict(dict): yield self[i] -class Package(_ControlFileDict): +class SourcePackage(_ControlFileDict): _fields = collections.OrderedDict(( - ('Package', str), ('Source', str), ('Architecture', PackageArchitecture), ('Section', str), @@ -763,6 +766,27 @@ class Package(_ControlFileDict): ('Build-Depends', PackageRelation), ('Build-Depends-Arch', PackageRelation), ('Build-Depends-Indep', PackageRelation), + ('Rules-Requires-Root', str), + ('Homepage', str), + ('Vcs-Browser', str), + ('Vcs-Git', str), + )) + + +class BinaryPackage(_ControlFileDict): + _fields = collections.OrderedDict(( + ('Package', str), + ('Package-Type', str), # for udeb only + ('Architecture', PackageArchitecture), + ('Section', str), + ('Priority', str), + # Build-Depends* fields aren't allowed for binary packages in + # the real control file, but we move them to the source + # package + ('Build-Depends', PackageRelation), + ('Build-Depends-Arch', PackageRelation), + ('Build-Depends-Indep', PackageRelation), + ('Build-Profiles', PackageBuildRestrictFormula), ('Provides', PackageRelation), ('Pre-Depends', PackageRelation), ('Depends', PackageRelation), @@ -771,8 +795,9 @@ class Package(_ControlFileDict): ('Replaces', PackageRelation), ('Breaks', PackageRelation), ('Conflicts', PackageRelation), + ('Multi-Arch', str), + ('Kernel-Version', str), # for udeb only ('Description', PackageDescription), - ('Build-Profiles', PackageBuildRestrictFormula), )) diff --git a/debian/lib/python/debian_linux/gencontrol.py b/debian/lib/python/debian_linux/gencontrol.py index ace501e9c..d1139f0e2 100644 --- a/debian/lib/python/debian_linux/gencontrol.py +++ b/debian/lib/python/debian_linux/gencontrol.py @@ -1,4 +1,3 @@ -import codecs import os import re from collections import OrderedDict @@ -401,7 +400,7 @@ class Gencontrol(object): self.write_makefile(makefile) def write_control(self, list, name='debian/control'): - self.write_rfc822(codecs.open(name, 'w', 'utf-8'), list) + self.write_rfc822(open(name, 'w', encoding='utf-8'), list) def write_makefile(self, makefile, name='debian/rules.gen'): f = open(name, 'w') diff --git a/debian/lib/python/debian_linux/utils.py b/debian/lib/python/debian_linux/utils.py index a43e98698..ad3258dc4 100644 --- a/debian/lib/python/debian_linux/utils.py +++ b/debian/lib/python/debian_linux/utils.py @@ -1,4 +1,3 @@ -import codecs import os import re import textwrap @@ -23,8 +22,10 @@ class Templates(object): for dir in self.dirs: filename = "%s/%s%s" % (dir, name, suffix) if os.path.exists(filename): - with codecs.open(filename, 'r', 'utf-8') as f: + with open(filename, 'r', encoding='utf-8') as f: mode = os.stat(f.fileno()).st_mode + if name == 'control.source': + return (read_control_source(f), mode) if prefix == 'control': return (read_control(f), mode) if prefix == 'tests-control': @@ -51,9 +52,14 @@ class Templates(object): return value[1] +def read_control_source(f): + from .debian import SourcePackage + return _read_rfc822(f, SourcePackage) + + def read_control(f): - from .debian import Package - return _read_rfc822(f, Package) + from .debian import BinaryPackage + return _read_rfc822(f, BinaryPackage) def read_tests_control(f): diff --git a/debian/patches-rt/0001-arm-Disable-jump-label-on-PREEMPT_RT.patch b/debian/patches-rt/0001-arm-Disable-jump-label-on-PREEMPT_RT.patch new file mode 100644 index 000000000..d62ca7ef8 --- /dev/null +++ b/debian/patches-rt/0001-arm-Disable-jump-label-on-PREEMPT_RT.patch @@ -0,0 +1,36 @@ +From: Thomas Gleixner <tglx@linutronix.de> +Date: Wed, 8 Jul 2015 17:14:48 +0200 +Subject: [PATCH 1/2] arm: Disable jump-label on PREEMPT_RT. +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/5.19/older/patches-5.19-rc3-rt5.tar.xz + +jump-labels are used to efficiently switch between two possible code +paths. To achieve this, stop_machine() is used to keep the CPU in a +known state while the opcode is modified. The usage of stop_machine() +here leads to large latency spikes which can be observed on PREEMPT_RT. + +Jump labels may change the target during runtime and are not restricted +to debug or "configuration/ setup" part of a PREEMPT_RT system where +high latencies could be defined as acceptable. + +Disable jump-label support on a PREEMPT_RT system. + +[bigeasy: Patch description.] + +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> +Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +Link: https://lkml.kernel.org/r/20220613182447.112191-2-bigeasy@linutronix.de +--- + arch/arm/Kconfig | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/arch/arm/Kconfig ++++ b/arch/arm/Kconfig +@@ -71,7 +71,7 @@ config ARM + select HARDIRQS_SW_RESEND + select HAVE_ARCH_AUDITSYSCALL if AEABI && !OABI_COMPAT + select HAVE_ARCH_BITREVERSE if (CPU_32v7M || CPU_32v7) && !CPU_32v6 +- select HAVE_ARCH_JUMP_LABEL if !XIP_KERNEL && !CPU_ENDIAN_BE32 && MMU ++ select HAVE_ARCH_JUMP_LABEL if !XIP_KERNEL && !CPU_ENDIAN_BE32 && MMU && !PREEMPT_RT + select HAVE_ARCH_KFENCE if MMU && !XIP_KERNEL + select HAVE_ARCH_KGDB if !CPU_ENDIAN_BE32 && MMU + select HAVE_ARCH_KASAN if MMU && !XIP_KERNEL diff --git a/debian/patches-rt/0001-fs-dcache-Disable-preemption-on-i_dir_seq-write-side.patch b/debian/patches-rt/0001-fs-dcache-Disable-preemption-on-i_dir_seq-write-side.patch new file mode 100644 index 000000000..780397c2b --- /dev/null +++ b/debian/patches-rt/0001-fs-dcache-Disable-preemption-on-i_dir_seq-write-side.patch @@ -0,0 +1,61 @@ +From: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +Date: Sun, 12 Jun 2022 16:27:28 +0200 +Subject: [PATCH 1/4] fs/dcache: Disable preemption on i_dir_seq write side on + PREEMPT_RT +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/5.19/older/patches-5.19-rc3-rt5.tar.xz + +i_dir_seq is a sequence counter with a lock which is represented by the +lowest bit. The writer atomically updates the counter which ensures that it +can be modified by only one writer at a time. This requires preemption to +be disabled across the write side critical section. + +On !PREEMPT_RT kernels this is implicit by the caller acquiring +dentry::lock. On PREEMPT_RT kernels spin_lock() does not disable preemption +which means that a preempting writer or reader would live lock. It's +therefore required to disable preemption explicitly. + +An alternative solution would be to replace i_dir_seq with a seqlock_t for +PREEMPT_RT, but that comes with its own set of problems due to arbitrary +lock nesting. A pure sequence count with an associated spinlock is not +possible because the locks held by the caller are not necessarily related. + +As the critical section is small, disabling preemption is a sensible +solution. + +Reported-by: Oleg.Karfich@wago.com +Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> +Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +Link: https://lkml.kernel.org/r/20220613140712.77932-2-bigeasy@linutronix.de +--- + fs/dcache.c | 12 +++++++++++- + 1 file changed, 11 insertions(+), 1 deletion(-) + +--- a/fs/dcache.c ++++ b/fs/dcache.c +@@ -2563,7 +2563,15 @@ EXPORT_SYMBOL(d_rehash); + + static inline unsigned start_dir_add(struct inode *dir) + { +- ++ /* ++ * The caller holds a spinlock (dentry::d_lock). On !PREEMPT_RT ++ * kernels spin_lock() implicitly disables preemption, but not on ++ * PREEMPT_RT. So for RT it has to be done explicitly to protect ++ * the sequence count write side critical section against a reader ++ * or another writer preempting, which would result in a live lock. ++ */ ++ if (IS_ENABLED(CONFIG_PREEMPT_RT)) ++ preempt_disable(); + for (;;) { + unsigned n = dir->i_dir_seq; + if (!(n & 1) && cmpxchg(&dir->i_dir_seq, n, n + 1) == n) +@@ -2575,6 +2583,8 @@ static inline unsigned start_dir_add(str + static inline void end_dir_add(struct inode *dir, unsigned n) + { + smp_store_release(&dir->i_dir_seq, n + 2); ++ if (IS_ENABLED(CONFIG_PREEMPT_RT)) ++ preempt_enable(); + } + + static void d_wait_lookup(struct dentry *dentry) diff --git a/debian/patches-rt/0001-printk-rename-cpulock-functions.patch b/debian/patches-rt/0001-printk-rename-cpulock-functions.patch deleted file mode 100644 index 8c3c56132..000000000 --- a/debian/patches-rt/0001-printk-rename-cpulock-functions.patch +++ /dev/null @@ -1,295 +0,0 @@ -From: John Ogness <john.ogness@linutronix.de> -Date: Wed, 20 Apr 2022 01:52:23 +0206 -Subject: [PATCH 01/18] printk: rename cpulock functions -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/5.18/older/patches-5.18-rt11.tar.xz - -Since the printk cpulock is CPU-reentrant and since it is used -in all contexts, its usage must be carefully considered and -most likely will require programming locklessly. To avoid -mistaking the printk cpulock as a typical lock, rename it to -cpu_sync. The main functions then become: - - printk_cpu_sync_get_irqsave(flags); - printk_cpu_sync_put_irqrestore(flags); - -Add extra notes of caution in the function description to help -developers understand the requirements for correct usage. - -Signed-off-by: John Ogness <john.ogness@linutronix.de> -Reviewed-by: Petr Mladek <pmladek@suse.com> -Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> -Link: https://lore.kernel.org/r/20220419234637.357112-2-john.ogness@linutronix.de -Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> ---- - include/linux/printk.h | 54 ++++++++++++++++++++++--------------- - kernel/printk/printk.c | 71 ++++++++++++++++++++++++------------------------- - lib/dump_stack.c | 4 +- - lib/nmi_backtrace.c | 4 +- - 4 files changed, 73 insertions(+), 60 deletions(-) - ---- a/include/linux/printk.h -+++ b/include/linux/printk.h -@@ -277,43 +277,55 @@ static inline void printk_trigger_flush( - #endif - - #ifdef CONFIG_SMP --extern int __printk_cpu_trylock(void); --extern void __printk_wait_on_cpu_lock(void); --extern void __printk_cpu_unlock(void); -+extern int __printk_cpu_sync_try_get(void); -+extern void __printk_cpu_sync_wait(void); -+extern void __printk_cpu_sync_put(void); - - /** -- * printk_cpu_lock_irqsave() - Acquire the printk cpu-reentrant spinning -- * lock and disable interrupts. -+ * printk_cpu_sync_get_irqsave() - Acquire the printk cpu-reentrant spinning -+ * lock and disable interrupts. - * @flags: Stack-allocated storage for saving local interrupt state, -- * to be passed to printk_cpu_unlock_irqrestore(). -+ * to be passed to printk_cpu_sync_put_irqrestore(). - * - * If the lock is owned by another CPU, spin until it becomes available. - * Interrupts are restored while spinning. -+ * -+ * CAUTION: This function must be used carefully. It does not behave like a -+ * typical lock. Here are important things to watch out for... -+ * -+ * * This function is reentrant on the same CPU. Therefore the calling -+ * code must not assume exclusive access to data if code accessing the -+ * data can run reentrant or within NMI context on the same CPU. -+ * -+ * * If there exists usage of this function from NMI context, it becomes -+ * unsafe to perform any type of locking or spinning to wait for other -+ * CPUs after calling this function from any context. This includes -+ * using spinlocks or any other busy-waiting synchronization methods. - */ --#define printk_cpu_lock_irqsave(flags) \ -- for (;;) { \ -- local_irq_save(flags); \ -- if (__printk_cpu_trylock()) \ -- break; \ -- local_irq_restore(flags); \ -- __printk_wait_on_cpu_lock(); \ -+#define printk_cpu_sync_get_irqsave(flags) \ -+ for (;;) { \ -+ local_irq_save(flags); \ -+ if (__printk_cpu_sync_try_get()) \ -+ break; \ -+ local_irq_restore(flags); \ -+ __printk_cpu_sync_wait(); \ - } - - /** -- * printk_cpu_unlock_irqrestore() - Release the printk cpu-reentrant spinning -- * lock and restore interrupts. -- * @flags: Caller's saved interrupt state, from printk_cpu_lock_irqsave(). -+ * printk_cpu_sync_put_irqrestore() - Release the printk cpu-reentrant spinning -+ * lock and restore interrupts. -+ * @flags: Caller's saved interrupt state, from printk_cpu_sync_get_irqsave(). - */ --#define printk_cpu_unlock_irqrestore(flags) \ -+#define printk_cpu_sync_put_irqrestore(flags) \ - do { \ -- __printk_cpu_unlock(); \ -+ __printk_cpu_sync_put(); \ - local_irq_restore(flags); \ -- } while (0) \ -+ } while (0) - - #else - --#define printk_cpu_lock_irqsave(flags) ((void)flags) --#define printk_cpu_unlock_irqrestore(flags) ((void)flags) -+#define printk_cpu_sync_get_irqsave(flags) ((void)flags) -+#define printk_cpu_sync_put_irqrestore(flags) ((void)flags) - - #endif /* CONFIG_SMP */ - ---- a/kernel/printk/printk.c -+++ b/kernel/printk/printk.c -@@ -3667,26 +3667,26 @@ EXPORT_SYMBOL_GPL(kmsg_dump_rewind); - #endif - - #ifdef CONFIG_SMP --static atomic_t printk_cpulock_owner = ATOMIC_INIT(-1); --static atomic_t printk_cpulock_nested = ATOMIC_INIT(0); -+static atomic_t printk_cpu_sync_owner = ATOMIC_INIT(-1); -+static atomic_t printk_cpu_sync_nested = ATOMIC_INIT(0); - - /** -- * __printk_wait_on_cpu_lock() - Busy wait until the printk cpu-reentrant -- * spinning lock is not owned by any CPU. -+ * __printk_cpu_sync_wait() - Busy wait until the printk cpu-reentrant -+ * spinning lock is not owned by any CPU. - * - * Context: Any context. - */ --void __printk_wait_on_cpu_lock(void) -+void __printk_cpu_sync_wait(void) - { - do { - cpu_relax(); -- } while (atomic_read(&printk_cpulock_owner) != -1); -+ } while (atomic_read(&printk_cpu_sync_owner) != -1); - } --EXPORT_SYMBOL(__printk_wait_on_cpu_lock); -+EXPORT_SYMBOL(__printk_cpu_sync_wait); - - /** -- * __printk_cpu_trylock() - Try to acquire the printk cpu-reentrant -- * spinning lock. -+ * __printk_cpu_sync_try_get() - Try to acquire the printk cpu-reentrant -+ * spinning lock. - * - * If no processor has the lock, the calling processor takes the lock and - * becomes the owner. If the calling processor is already the owner of the -@@ -3695,7 +3695,7 @@ EXPORT_SYMBOL(__printk_wait_on_cpu_lock) - * Context: Any context. Expects interrupts to be disabled. - * Return: 1 on success, otherwise 0. - */ --int __printk_cpu_trylock(void) -+int __printk_cpu_sync_try_get(void) - { - int cpu; - int old; -@@ -3705,79 +3705,80 @@ int __printk_cpu_trylock(void) - /* - * Guarantee loads and stores from this CPU when it is the lock owner - * are _not_ visible to the previous lock owner. This pairs with -- * __printk_cpu_unlock:B. -+ * __printk_cpu_sync_put:B. - * - * Memory barrier involvement: - * -- * If __printk_cpu_trylock:A reads from __printk_cpu_unlock:B, then -- * __printk_cpu_unlock:A can never read from __printk_cpu_trylock:B. -+ * If __printk_cpu_sync_try_get:A reads from __printk_cpu_sync_put:B, -+ * then __printk_cpu_sync_put:A can never read from -+ * __printk_cpu_sync_try_get:B. - * - * Relies on: - * -- * RELEASE from __printk_cpu_unlock:A to __printk_cpu_unlock:B -+ * RELEASE from __printk_cpu_sync_put:A to __printk_cpu_sync_put:B - * of the previous CPU - * matching -- * ACQUIRE from __printk_cpu_trylock:A to __printk_cpu_trylock:B -- * of this CPU -+ * ACQUIRE from __printk_cpu_sync_try_get:A to -+ * __printk_cpu_sync_try_get:B of this CPU - */ -- old = atomic_cmpxchg_acquire(&printk_cpulock_owner, -1, -- cpu); /* LMM(__printk_cpu_trylock:A) */ -+ old = atomic_cmpxchg_acquire(&printk_cpu_sync_owner, -1, -+ cpu); /* LMM(__printk_cpu_sync_try_get:A) */ - if (old == -1) { - /* - * This CPU is now the owner and begins loading/storing -- * data: LMM(__printk_cpu_trylock:B) -+ * data: LMM(__printk_cpu_sync_try_get:B) - */ - return 1; - - } else if (old == cpu) { - /* This CPU is already the owner. */ -- atomic_inc(&printk_cpulock_nested); -+ atomic_inc(&printk_cpu_sync_nested); - return 1; - } - - return 0; - } --EXPORT_SYMBOL(__printk_cpu_trylock); -+EXPORT_SYMBOL(__printk_cpu_sync_try_get); - - /** -- * __printk_cpu_unlock() - Release the printk cpu-reentrant spinning lock. -+ * __printk_cpu_sync_put() - Release the printk cpu-reentrant spinning lock. - * - * The calling processor must be the owner of the lock. - * - * Context: Any context. Expects interrupts to be disabled. - */ --void __printk_cpu_unlock(void) -+void __printk_cpu_sync_put(void) - { -- if (atomic_read(&printk_cpulock_nested)) { -- atomic_dec(&printk_cpulock_nested); -+ if (atomic_read(&printk_cpu_sync_nested)) { -+ atomic_dec(&printk_cpu_sync_nested); - return; - } - - /* - * This CPU is finished loading/storing data: -- * LMM(__printk_cpu_unlock:A) -+ * LMM(__printk_cpu_sync_put:A) - */ - - /* - * Guarantee loads and stores from this CPU when it was the - * lock owner are visible to the next lock owner. This pairs -- * with __printk_cpu_trylock:A. -+ * with __printk_cpu_sync_try_get:A. - * - * Memory barrier involvement: - * -- * If __printk_cpu_trylock:A reads from __printk_cpu_unlock:B, -- * then __printk_cpu_trylock:B reads from __printk_cpu_unlock:A. -+ * If __printk_cpu_sync_try_get:A reads from __printk_cpu_sync_put:B, -+ * then __printk_cpu_sync_try_get:B reads from __printk_cpu_sync_put:A. - * - * Relies on: - * -- * RELEASE from __printk_cpu_unlock:A to __printk_cpu_unlock:B -+ * RELEASE from __printk_cpu_sync_put:A to __printk_cpu_sync_put:B - * of this CPU - * matching -- * ACQUIRE from __printk_cpu_trylock:A to __printk_cpu_trylock:B -- * of the next CPU -+ * ACQUIRE from __printk_cpu_sync_try_get:A to -+ * __printk_cpu_sync_try_get:B of the next CPU - */ -- atomic_set_release(&printk_cpulock_owner, -- -1); /* LMM(__printk_cpu_unlock:B) */ -+ atomic_set_release(&printk_cpu_sync_owner, -+ -1); /* LMM(__printk_cpu_sync_put:B) */ - } --EXPORT_SYMBOL(__printk_cpu_unlock); -+EXPORT_SYMBOL(__printk_cpu_sync_put); - #endif /* CONFIG_SMP */ ---- a/lib/dump_stack.c -+++ b/lib/dump_stack.c -@@ -102,9 +102,9 @@ asmlinkage __visible void dump_stack_lvl - * Permit this cpu to perform nested stack dumps while serialising - * against other CPUs - */ -- printk_cpu_lock_irqsave(flags); -+ printk_cpu_sync_get_irqsave(flags); - __dump_stack(log_lvl); -- printk_cpu_unlock_irqrestore(flags); -+ printk_cpu_sync_put_irqrestore(flags); - } - EXPORT_SYMBOL(dump_stack_lvl); - ---- a/lib/nmi_backtrace.c -+++ b/lib/nmi_backtrace.c -@@ -99,7 +99,7 @@ bool nmi_cpu_backtrace(struct pt_regs *r - * Allow nested NMI backtraces while serializing - * against other CPUs. - */ -- printk_cpu_lock_irqsave(flags); -+ printk_cpu_sync_get_irqsave(flags); - if (!READ_ONCE(backtrace_idle) && regs && cpu_in_idle(instruction_pointer(regs))) { - pr_warn("NMI backtrace for cpu %d skipped: idling at %pS\n", - cpu, (void *)instruction_pointer(regs)); -@@ -110,7 +110,7 @@ bool nmi_cpu_backtrace(struct pt_regs *r - else - dump_stack(); - } -- printk_cpu_unlock_irqrestore(flags); -+ printk_cpu_sync_put_irqrestore(flags); - cpumask_clear_cpu(cpu, to_cpumask(backtrace_mask)); - return true; - } diff --git a/debian/patches-rt/0001-sched-Fix-missing-prototype-warnings.patch b/debian/patches-rt/0001-sched-Fix-missing-prototype-warnings.patch deleted file mode 100644 index e84d7c131..000000000 --- a/debian/patches-rt/0001-sched-Fix-missing-prototype-warnings.patch +++ /dev/null @@ -1,142 +0,0 @@ -From: Thomas Gleixner <tglx@linutronix.de> -Date: Wed, 13 Apr 2022 15:31:02 +0200 -Subject: [PATCH 1/3] sched: Fix missing prototype warnings -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/5.18/older/patches-5.18-rt11.tar.xz - -A W=1 build emits more than a dozen missing prototype warnings related to -scheduler and scheduler specific includes. - -Reported-by: kernel test robot <lkp@intel.com> -Signed-off-by: Thomas Gleixner <tglx@linutronix.de> -Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> -Link: https://lore.kernel.org/r/20220413133024.249118058@linutronix.de ---- - include/linux/sched.h | 2 ++ - kernel/sched/build_policy.c | 2 ++ - kernel/sched/build_utility.c | 1 + - kernel/sched/core.c | 3 +++ - kernel/sched/deadline.c | 2 -- - kernel/sched/fair.c | 1 + - kernel/sched/sched.h | 8 ++------ - kernel/sched/smp.h | 6 ++++++ - kernel/stop_machine.c | 2 -- - 9 files changed, 17 insertions(+), 10 deletions(-) - ---- a/include/linux/sched.h -+++ b/include/linux/sched.h -@@ -2362,4 +2362,6 @@ static inline void sched_core_free(struc - static inline void sched_core_fork(struct task_struct *p) { } - #endif - -+extern void sched_set_stop_task(int cpu, struct task_struct *stop); -+ - #endif ---- a/kernel/sched/build_policy.c -+++ b/kernel/sched/build_policy.c -@@ -15,6 +15,7 @@ - /* Headers: */ - #include <linux/sched/clock.h> - #include <linux/sched/cputime.h> -+#include <linux/sched/hotplug.h> - #include <linux/sched/posix-timers.h> - #include <linux/sched/rt.h> - -@@ -31,6 +32,7 @@ - #include <uapi/linux/sched/types.h> - - #include "sched.h" -+#include "smp.h" - - #include "autogroup.h" - #include "stats.h" ---- a/kernel/sched/build_utility.c -+++ b/kernel/sched/build_utility.c -@@ -14,6 +14,7 @@ - #include <linux/sched/debug.h> - #include <linux/sched/isolation.h> - #include <linux/sched/loadavg.h> -+#include <linux/sched/nohz.h> - #include <linux/sched/mm.h> - #include <linux/sched/rseq_api.h> - #include <linux/sched/task_stack.h> ---- a/kernel/sched/core.c -+++ b/kernel/sched/core.c -@@ -26,7 +26,10 @@ - #include <linux/topology.h> - #include <linux/sched/clock.h> - #include <linux/sched/cond_resched.h> -+#include <linux/sched/cputime.h> - #include <linux/sched/debug.h> -+#include <linux/sched/hotplug.h> -+#include <linux/sched/init.h> - #include <linux/sched/isolation.h> - #include <linux/sched/loadavg.h> - #include <linux/sched/mm.h> ---- a/kernel/sched/deadline.c -+++ b/kernel/sched/deadline.c -@@ -1220,8 +1220,6 @@ int dl_runtime_exceeded(struct sched_dl_ - return (dl_se->runtime <= 0); - } - --extern bool sched_rt_bandwidth_account(struct rt_rq *rt_rq); -- - /* - * This function implements the GRUB accounting rule: - * according to the GRUB reclaiming algorithm, the runtime is ---- a/kernel/sched/fair.c -+++ b/kernel/sched/fair.c -@@ -36,6 +36,7 @@ - #include <linux/sched/cond_resched.h> - #include <linux/sched/cputime.h> - #include <linux/sched/isolation.h> -+#include <linux/sched/nohz.h> - - #include <linux/cpuidle.h> - #include <linux/interrupt.h> ---- a/kernel/sched/sched.h -+++ b/kernel/sched/sched.h -@@ -1827,12 +1827,7 @@ static inline void dirty_sched_domain_sy - #endif - - extern int sched_update_scaling(void); -- --extern void flush_smp_call_function_from_idle(void); -- --#else /* !CONFIG_SMP: */ --static inline void flush_smp_call_function_from_idle(void) { } --#endif -+#endif /* CONFIG_SMP */ - - #include "stats.h" - -@@ -2309,6 +2304,7 @@ extern void resched_cpu(int cpu); - - extern struct rt_bandwidth def_rt_bandwidth; - extern void init_rt_bandwidth(struct rt_bandwidth *rt_b, u64 period, u64 runtime); -+extern bool sched_rt_bandwidth_account(struct rt_rq *rt_rq); - - extern void init_dl_bandwidth(struct dl_bandwidth *dl_b, u64 period, u64 runtime); - extern void init_dl_task_timer(struct sched_dl_entity *dl_se); ---- a/kernel/sched/smp.h -+++ b/kernel/sched/smp.h -@@ -7,3 +7,9 @@ - extern void sched_ttwu_pending(void *arg); - - extern void send_call_function_single_ipi(int cpu); -+ -+#ifdef CONFIG_SMP -+extern void flush_smp_call_function_from_idle(void); -+#else -+static inline void flush_smp_call_function_from_idle(void) { } -+#endif ---- a/kernel/stop_machine.c -+++ b/kernel/stop_machine.c -@@ -535,8 +535,6 @@ void stop_machine_park(int cpu) - kthread_park(stopper->thread); - } - --extern void sched_set_stop_task(int cpu, struct task_struct *stop); -- - static void cpu_stop_create(unsigned int cpu) - { - sched_set_stop_task(cpu, per_cpu(cpu_stopper.thread, cpu)); diff --git a/debian/patches-rt/0001-scsi-fcoe-Add-a-local_lock-to-fcoe_percpu.patch b/debian/patches-rt/0001-scsi-fcoe-Add-a-local_lock-to-fcoe_percpu.patch deleted file mode 100644 index 84d552159..000000000 --- a/debian/patches-rt/0001-scsi-fcoe-Add-a-local_lock-to-fcoe_percpu.patch +++ /dev/null @@ -1,70 +0,0 @@ -From: Davidlohr Bueso <dave@stgolabs.net> -Date: Fri, 6 May 2022 12:57:55 +0200 -Subject: [PATCH 1/4] scsi: fcoe: Add a local_lock to fcoe_percpu -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/5.18/older/patches-5.18-rt11.tar.xz - -fcoe_get_paged_crc_eof() relies on the caller having preemption -disabled to ensure the per-CPU fcoe_percpu context remains valid -throughout the call. This is done by either holding spinlocks -(such as bnx2fc_global_lock or qedf_global_lock) or the get_cpu() -from fcoe_alloc_paged_crc_eof(). This last one breaks PREEMPT_RT -semantics as there can be memory allocation and end up sleeping -in atomic contexts. - -Introduce a local_lock_t to struct fcoe_percpu that will keep the -non-RT case the same, mapping to preempt_disable/enable, while -RT will use a per-CPU spinlock allowing the region to be preemptible -but still maintain CPU locality. The other users of fcoe_percpu -are already safe in this regard and do not require local_lock()ing. - -Signed-off-by: Davidlohr Bueso <dbueso@suse.de> -Acked-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> -Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> -Link: https://lore.kernel.org/r/20211117025956.79616-3-dave@stgolabs.net -Link: https://lore.kernel.org/r/20220506105758.283887-2-bigeasy@linutronix.de ---- - drivers/scsi/fcoe/fcoe.c | 6 ++++-- - include/scsi/libfcoe.h | 2 ++ - 2 files changed, 6 insertions(+), 2 deletions(-) - ---- a/drivers/scsi/fcoe/fcoe.c -+++ b/drivers/scsi/fcoe/fcoe.c -@@ -1453,9 +1453,10 @@ static int fcoe_alloc_paged_crc_eof(stru - struct fcoe_percpu_s *fps; - int rc; - -- fps = &get_cpu_var(fcoe_percpu); -+ local_lock(&fcoe_percpu.lock); -+ fps = this_cpu_ptr(&fcoe_percpu); - rc = fcoe_get_paged_crc_eof(skb, tlen, fps); -- put_cpu_var(fcoe_percpu); -+ local_unlock(&fcoe_percpu.lock); - - return rc; - } -@@ -2488,6 +2489,7 @@ static int __init fcoe_init(void) - p = per_cpu_ptr(&fcoe_percpu, cpu); - INIT_WORK(&p->work, fcoe_receive_work); - skb_queue_head_init(&p->fcoe_rx_list); -+ local_lock_init(&p->lock); - } - - /* Setup link change notification */ ---- a/include/scsi/libfcoe.h -+++ b/include/scsi/libfcoe.h -@@ -14,6 +14,7 @@ - #include <linux/netdevice.h> - #include <linux/skbuff.h> - #include <linux/workqueue.h> -+#include <linux/local_lock.h> - #include <linux/random.h> - #include <scsi/fc/fc_fcoe.h> - #include <scsi/libfc.h> -@@ -326,6 +327,7 @@ struct fcoe_percpu_s { - struct sk_buff_head fcoe_rx_list; - struct page *crc_eof_page; - int crc_eof_offset; -+ local_lock_t lock; - }; - - /** diff --git a/debian/patches-rt/0001-signal-Rename-send_signal-send_signal_locked.patch b/debian/patches-rt/0001-signal-Rename-send_signal-send_signal_locked.patch deleted file mode 100644 index bd609e6b2..000000000 --- a/debian/patches-rt/0001-signal-Rename-send_signal-send_signal_locked.patch +++ /dev/null @@ -1,124 +0,0 @@ -From: "Eric W. Biederman" <ebiederm@xmission.com> -Date: Thu, 5 May 2022 13:26:34 -0500 -Subject: [PATCH 01/12] signal: Rename send_signal send_signal_locked -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/5.18/older/patches-5.18-rt11.tar.xz - -Rename send_signal and __send_signal to send_signal_locked and -__send_signal_locked to make send_signal usable outside of -signal.c. - -Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com> -Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> -Link: https://lore.kernel.org/r/20220505182645.497868-1-ebiederm@xmission.com ---- - include/linux/signal.h | 2 ++ - kernel/signal.c | 24 ++++++++++++------------ - 2 files changed, 14 insertions(+), 12 deletions(-) - ---- a/include/linux/signal.h -+++ b/include/linux/signal.h -@@ -283,6 +283,8 @@ extern int do_send_sig_info(int sig, str - extern int group_send_sig_info(int sig, struct kernel_siginfo *info, - struct task_struct *p, enum pid_type type); - extern int __group_send_sig_info(int, struct kernel_siginfo *, struct task_struct *); -+extern int send_signal_locked(int sig, struct kernel_siginfo *info, -+ struct task_struct *p, enum pid_type type); - extern int sigprocmask(int, sigset_t *, sigset_t *); - extern void set_current_blocked(sigset_t *); - extern void __set_current_blocked(const sigset_t *); ---- a/kernel/signal.c -+++ b/kernel/signal.c -@@ -1071,8 +1071,8 @@ static inline bool legacy_queue(struct s - return (sig < SIGRTMIN) && sigismember(&signals->signal, sig); - } - --static int __send_signal(int sig, struct kernel_siginfo *info, struct task_struct *t, -- enum pid_type type, bool force) -+static int __send_signal_locked(int sig, struct kernel_siginfo *info, -+ struct task_struct *t, enum pid_type type, bool force) - { - struct sigpending *pending; - struct sigqueue *q; -@@ -1212,8 +1212,8 @@ static inline bool has_si_pid_and_uid(st - return ret; - } - --static int send_signal(int sig, struct kernel_siginfo *info, struct task_struct *t, -- enum pid_type type) -+int send_signal_locked(int sig, struct kernel_siginfo *info, -+ struct task_struct *t, enum pid_type type) - { - /* Should SIGKILL or SIGSTOP be received by a pid namespace init? */ - bool force = false; -@@ -1245,7 +1245,7 @@ static int send_signal(int sig, struct k - force = true; - } - } -- return __send_signal(sig, info, t, type, force); -+ return __send_signal_locked(sig, info, t, type, force); - } - - static void print_fatal_signal(int signr) -@@ -1284,7 +1284,7 @@ static int __init setup_print_fatal_sign - int - __group_send_sig_info(int sig, struct kernel_siginfo *info, struct task_struct *p) - { -- return send_signal(sig, info, p, PIDTYPE_TGID); -+ return send_signal_locked(sig, info, p, PIDTYPE_TGID); - } - - int do_send_sig_info(int sig, struct kernel_siginfo *info, struct task_struct *p, -@@ -1294,7 +1294,7 @@ int do_send_sig_info(int sig, struct ker - int ret = -ESRCH; - - if (lock_task_sighand(p, &flags)) { -- ret = send_signal(sig, info, p, type); -+ ret = send_signal_locked(sig, info, p, type); - unlock_task_sighand(p, &flags); - } - -@@ -1347,7 +1347,7 @@ force_sig_info_to_task(struct kernel_sig - if (action->sa.sa_handler == SIG_DFL && - (!t->ptrace || (handler == HANDLER_EXIT))) - t->signal->flags &= ~SIGNAL_UNKILLABLE; -- ret = send_signal(sig, info, t, PIDTYPE_PID); -+ ret = send_signal_locked(sig, info, t, PIDTYPE_PID); - spin_unlock_irqrestore(&t->sighand->siglock, flags); - - return ret; -@@ -1567,7 +1567,7 @@ int kill_pid_usb_asyncio(int sig, int er - - if (sig) { - if (lock_task_sighand(p, &flags)) { -- ret = __send_signal(sig, &info, p, PIDTYPE_TGID, false); -+ ret = __send_signal_locked(sig, &info, p, PIDTYPE_TGID, false); - unlock_task_sighand(p, &flags); - } else - ret = -ESRCH; -@@ -2103,7 +2103,7 @@ bool do_notify_parent(struct task_struct - * parent's namespaces. - */ - if (valid_signal(sig) && sig) -- __send_signal(sig, &info, tsk->parent, PIDTYPE_TGID, false); -+ __send_signal_locked(sig, &info, tsk->parent, PIDTYPE_TGID, false); - __wake_up_parent(tsk, tsk->parent); - spin_unlock_irqrestore(&psig->siglock, flags); - -@@ -2601,7 +2601,7 @@ static int ptrace_signal(int signr, kern - /* If the (new) signal is now blocked, requeue it. */ - if (sigismember(¤t->blocked, signr) || - fatal_signal_pending(current)) { -- send_signal(signr, info, current, type); -+ send_signal_locked(signr, info, current, type); - signr = 0; - } - -@@ -4793,7 +4793,7 @@ void kdb_send_sig(struct task_struct *t, - "the deadlock.\n"); - return; - } -- ret = send_signal(sig, SEND_SIG_PRIV, t, PIDTYPE_PID); -+ ret = send_signal_locked(sig, SEND_SIG_PRIV, t, PIDTYPE_PID); - spin_unlock(&t->sighand->siglock); - if (ret) - kdb_printf("Fail to deliver Signal %d to process %d.\n", diff --git a/debian/patches-rt/0002-fs-dcache-Split-__d_lookup_done.patch b/debian/patches-rt/0002-fs-dcache-Split-__d_lookup_done.patch new file mode 100644 index 000000000..44e79dbd9 --- /dev/null +++ b/debian/patches-rt/0002-fs-dcache-Split-__d_lookup_done.patch @@ -0,0 +1,99 @@ +From: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +Date: Sun, 12 Jun 2022 16:27:29 +0200 +Subject: [PATCH 2/4] fs/dcache: Split __d_lookup_done() +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/5.19/older/patches-5.19-rc3-rt5.tar.xz + +__d_lookup_done() wakes waiters on dentry::d_wait inside a preemption +disabled region. This violates the PREEMPT_RT constraints as the wake up +acquires wait_queue_head::lock which is a "sleeping" spinlock on RT. + +As a first step to solve this, move the wake up outside of the +hlist_bl_lock() held section. + +This is safe because: + + 1) The whole sequence including the wake up is protected by dentry::lock. + + 2) The waitqueue head is allocated by the caller on stack and can't go + away until the whole callchain completes. + + 3) If a queued waiter is woken by a spurious wake up, then it is blocked + on dentry:lock before it can observe DCACHE_PAR_LOOKUP cleared and + return from d_wait_lookup(). + + As the wake up is inside the dentry:lock held region it's guaranteed + that the waiters waitq is dequeued from the waitqueue head before the + waiter returns. + + Moving the wake up past the unlock of dentry::lock would allow the + waiter to return with the on stack waitq still enqueued due to a + spurious wake up. + + 4) New waiters have to acquire dentry::lock before checking whether the + DCACHE_PAR_LOOKUP flag is set. + +Let __d_lookup_unhash(): + + 1) Lock the lookup hash and clear DCACHE_PAR_LOOKUP + 2) Unhash the dentry + 3) Retrieve and clear dentry::d_wait + 4) Unlock the hash and return the retrieved waitqueue head pointer + 5) Let the caller handle the wake up. + +This does not yet solve the PREEMPT_RT problem completely because +preemption is still disabled due to i_dir_seq being held for write. This +will be addressed in subsequent steps. + +An alternative solution would be to switch the waitqueue to a simple +waitqueue, but aside of Linus not being a fan of them, moving the wake up +closer to the place where dentry::lock is unlocked reduces lock contention +time for the woken up waiter. + +Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> +Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +Link: https://lkml.kernel.org/r/20220613140712.77932-3-bigeasy@linutronix.de +--- + fs/dcache.c | 23 +++++++++++++++++++---- + 1 file changed, 19 insertions(+), 4 deletions(-) + +--- a/fs/dcache.c ++++ b/fs/dcache.c +@@ -2711,18 +2711,33 @@ struct dentry *d_alloc_parallel(struct d + } + EXPORT_SYMBOL(d_alloc_parallel); + +-void __d_lookup_done(struct dentry *dentry) ++/* ++ * - Unhash the dentry ++ * - Retrieve and clear the waitqueue head in dentry ++ * - Return the waitqueue head ++ */ ++static wait_queue_head_t *__d_lookup_unhash(struct dentry *dentry) + { +- struct hlist_bl_head *b = in_lookup_hash(dentry->d_parent, +- dentry->d_name.hash); ++ wait_queue_head_t *d_wait; ++ struct hlist_bl_head *b; ++ ++ lockdep_assert_held(&dentry->d_lock); ++ ++ b = in_lookup_hash(dentry->d_parent, dentry->d_name.hash); + hlist_bl_lock(b); + dentry->d_flags &= ~DCACHE_PAR_LOOKUP; + __hlist_bl_del(&dentry->d_u.d_in_lookup_hash); +- wake_up_all(dentry->d_wait); ++ d_wait = dentry->d_wait; + dentry->d_wait = NULL; + hlist_bl_unlock(b); + INIT_HLIST_NODE(&dentry->d_u.d_alias); + INIT_LIST_HEAD(&dentry->d_lru); ++ return d_wait; ++} ++ ++void __d_lookup_done(struct dentry *dentry) ++{ ++ wake_up_all(__d_lookup_unhash(dentry)); + } + EXPORT_SYMBOL(__d_lookup_done); + diff --git a/debian/patches-rt/0002-printk-cpu-sync-always-disable-interrupts.patch b/debian/patches-rt/0002-printk-cpu-sync-always-disable-interrupts.patch deleted file mode 100644 index 7a1618ce6..000000000 --- a/debian/patches-rt/0002-printk-cpu-sync-always-disable-interrupts.patch +++ /dev/null @@ -1,54 +0,0 @@ -From: John Ogness <john.ogness@linutronix.de> -Date: Wed, 20 Apr 2022 01:52:24 +0206 -Subject: [PATCH 02/18] printk: cpu sync always disable interrupts -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/5.18/older/patches-5.18-rt11.tar.xz - -The CPU sync functions are a NOP for !CONFIG_SMP. But for -!CONFIG_SMP they still need to disable interrupts in order to -preserve context within the CPU sync sections. - -Signed-off-by: John Ogness <john.ogness@linutronix.de> -Reviewed-by: Petr Mladek <pmladek@suse.com> -Reviewed-by: Sergey Senozhatsky <senozhatsky@chromium.org> -Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> -Link: https://lore.kernel.org/r/20220419234637.357112-3-john.ogness@linutronix.de -Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> ---- - include/linux/printk.h | 18 +++++++++--------- - 1 file changed, 9 insertions(+), 9 deletions(-) - ---- a/include/linux/printk.h -+++ b/include/linux/printk.h -@@ -281,9 +281,16 @@ extern int __printk_cpu_sync_try_get(voi - extern void __printk_cpu_sync_wait(void); - extern void __printk_cpu_sync_put(void); - -+#else -+ -+#define __printk_cpu_sync_try_get() true -+#define __printk_cpu_sync_wait() -+#define __printk_cpu_sync_put() -+#endif /* CONFIG_SMP */ -+ - /** -- * printk_cpu_sync_get_irqsave() - Acquire the printk cpu-reentrant spinning -- * lock and disable interrupts. -+ * printk_cpu_sync_get_irqsave() - Disable interrupts and acquire the printk -+ * cpu-reentrant spinning lock. - * @flags: Stack-allocated storage for saving local interrupt state, - * to be passed to printk_cpu_sync_put_irqrestore(). - * -@@ -322,13 +329,6 @@ extern void __printk_cpu_sync_put(void); - local_irq_restore(flags); \ - } while (0) - --#else -- --#define printk_cpu_sync_get_irqsave(flags) ((void)flags) --#define printk_cpu_sync_put_irqrestore(flags) ((void)flags) -- --#endif /* CONFIG_SMP */ -- - extern int kptr_restrict; - - /** diff --git a/debian/patches-rt/0002-scsi-fcoe-Use-per-CPU-API-to-update-per-CPU-statisti.patch b/debian/patches-rt/0002-scsi-fcoe-Use-per-CPU-API-to-update-per-CPU-statisti.patch deleted file mode 100644 index 113666097..000000000 --- a/debian/patches-rt/0002-scsi-fcoe-Use-per-CPU-API-to-update-per-CPU-statisti.patch +++ /dev/null @@ -1,517 +0,0 @@ -From: Sebastian Andrzej Siewior <bigeasy@linutronix.de> -Date: Fri, 6 May 2022 12:57:56 +0200 -Subject: [PATCH 2/4] scsi: fcoe: Use per-CPU API to update per-CPU statistics. -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/5.18/older/patches-5.18-rt11.tar.xz - -The per-CPU statistics (struct fc_stats) is updated by getting a stable -per-CPU pointer via get_cpu() + per_cpu_ptr() and then performing the -increment. This can be optimized by using this_cpu_*() which will do -whatever is needed on the architecture to perform the update safe and -efficient. -The read out of the individual value (fc_get_host_stats()) should be -done by using READ_ONCE() instead of a plain-C access. The difference is -that READ_ONCE() will always perform a single access while the plain-C -access can be splitt by the compiler into two loads if it appears -beneficial. -The usage of u64 has the side-effect that it is also 64bit wide on 32bit -architectures and the read is always split into two loads. The can lead -to strange values if the read happens during an update which alters both -32bit parts of the 64bit value. This can be circumvanted by either using -a 32bit variables on 32bit architecures or extending the statistics with -a sequence counter. - -Use this_cpu_*() API to update the statistics and READ_ONCE() to read -it. - -Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> -Link: https://lore.kernel.org/r/20220506105758.283887-3-bigeasy@linutronix.de ---- - drivers/scsi/bnx2fc/bnx2fc_fcoe.c | 22 ++++++---------------- - drivers/scsi/bnx2fc/bnx2fc_io.c | 13 +++++-------- - drivers/scsi/fcoe/fcoe.c | 36 ++++++++++++------------------------ - drivers/scsi/fcoe/fcoe_ctlr.c | 26 +++++++++----------------- - drivers/scsi/fcoe/fcoe_transport.c | 6 +++--- - drivers/scsi/libfc/fc_fcp.c | 29 ++++++++++------------------- - drivers/scsi/libfc/fc_lport.c | 30 +++++++++++++++--------------- - drivers/scsi/qedf/qedf_main.c | 7 ++----- - 8 files changed, 62 insertions(+), 107 deletions(-) - ---- a/drivers/scsi/bnx2fc/bnx2fc_fcoe.c -+++ b/drivers/scsi/bnx2fc/bnx2fc_fcoe.c -@@ -273,7 +273,6 @@ static int bnx2fc_xmit(struct fc_lport * - struct fcoe_port *port; - struct fcoe_hdr *hp; - struct bnx2fc_rport *tgt; -- struct fc_stats *stats; - u8 sof, eof; - u32 crc; - unsigned int hlen, tlen, elen; -@@ -399,10 +398,8 @@ static int bnx2fc_xmit(struct fc_lport * - } - - /*update tx stats */ -- stats = per_cpu_ptr(lport->stats, get_cpu()); -- stats->TxFrames++; -- stats->TxWords += wlen; -- put_cpu(); -+ this_cpu_inc(lport->stats->TxFrames); -+ this_cpu_add(lport->stats->TxWords, wlen); - - /* send down to lld */ - fr_dev(fp) = lport; -@@ -512,7 +509,6 @@ static void bnx2fc_recv_frame(struct sk_ - u32 fr_len, fr_crc; - struct fc_lport *lport; - struct fcoe_rcv_info *fr; -- struct fc_stats *stats; - struct fc_frame_header *fh; - struct fcoe_crc_eof crc_eof; - struct fc_frame *fp; -@@ -543,10 +539,8 @@ static void bnx2fc_recv_frame(struct sk_ - skb_pull(skb, sizeof(struct fcoe_hdr)); - fr_len = skb->len - sizeof(struct fcoe_crc_eof); - -- stats = per_cpu_ptr(lport->stats, get_cpu()); -- stats->RxFrames++; -- stats->RxWords += fr_len / FCOE_WORD_TO_BYTE; -- put_cpu(); -+ this_cpu_inc(lport->stats->RxFrames); -+ this_cpu_add(lport->stats->RxWords, fr_len / FCOE_WORD_TO_BYTE); - - fp = (struct fc_frame *)skb; - fc_frame_init(fp); -@@ -633,9 +627,7 @@ static void bnx2fc_recv_frame(struct sk_ - fr_crc = le32_to_cpu(fr_crc(fp)); - - if (unlikely(fr_crc != ~crc32(~0, skb->data, fr_len))) { -- stats = per_cpu_ptr(lport->stats, get_cpu()); -- crc_err = (stats->InvalidCRCCount++); -- put_cpu(); -+ crc_err = this_cpu_inc_return(lport->stats->InvalidCRCCount); - if (crc_err < 5) - printk(KERN_WARNING PFX "dropping frame with " - "CRC error\n"); -@@ -964,9 +956,7 @@ static void bnx2fc_indicate_netevent(voi - mutex_unlock(&lport->lp_mutex); - fc_host_port_type(lport->host) = - FC_PORTTYPE_UNKNOWN; -- per_cpu_ptr(lport->stats, -- get_cpu())->LinkFailureCount++; -- put_cpu(); -+ this_cpu_inc(lport->stats->LinkFailureCount); - fcoe_clean_pending_queue(lport); - wait_for_upload = 1; - } ---- a/drivers/scsi/bnx2fc/bnx2fc_io.c -+++ b/drivers/scsi/bnx2fc/bnx2fc_io.c -@@ -2032,7 +2032,6 @@ int bnx2fc_post_io_req(struct bnx2fc_rpo - struct bnx2fc_interface *interface = port->priv; - struct bnx2fc_hba *hba = interface->hba; - struct fc_lport *lport = port->lport; -- struct fc_stats *stats; - int task_idx, index; - u16 xid; - -@@ -2045,20 +2044,18 @@ int bnx2fc_post_io_req(struct bnx2fc_rpo - io_req->data_xfer_len = scsi_bufflen(sc_cmd); - bnx2fc_priv(sc_cmd)->io_req = io_req; - -- stats = per_cpu_ptr(lport->stats, get_cpu()); - if (sc_cmd->sc_data_direction == DMA_FROM_DEVICE) { - io_req->io_req_flags = BNX2FC_READ; -- stats->InputRequests++; -- stats->InputBytes += io_req->data_xfer_len; -+ this_cpu_inc(lport->stats->InputRequests); -+ this_cpu_add(lport->stats->InputBytes, io_req->data_xfer_len); - } else if (sc_cmd->sc_data_direction == DMA_TO_DEVICE) { - io_req->io_req_flags = BNX2FC_WRITE; -- stats->OutputRequests++; -- stats->OutputBytes += io_req->data_xfer_len; -+ this_cpu_inc(lport->stats->OutputRequests); -+ this_cpu_add(lport->stats->OutputBytes, io_req->data_xfer_len); - } else { - io_req->io_req_flags = 0; -- stats->ControlRequests++; -+ this_cpu_inc(lport->stats->ControlRequests); - } -- put_cpu(); - - xid = io_req->xid; - ---- a/drivers/scsi/fcoe/fcoe.c -+++ b/drivers/scsi/fcoe/fcoe.c -@@ -1434,8 +1434,7 @@ static int fcoe_rcv(struct sk_buff *skb, - - return NET_RX_SUCCESS; - err: -- per_cpu_ptr(lport->stats, get_cpu())->ErrorFrames++; -- put_cpu(); -+ this_cpu_inc(lport->stats->ErrorFrames); - err2: - kfree_skb(skb); - return NET_RX_DROP; -@@ -1475,7 +1474,6 @@ static int fcoe_xmit(struct fc_lport *lp - struct ethhdr *eh; - struct fcoe_crc_eof *cp; - struct sk_buff *skb; -- struct fc_stats *stats; - struct fc_frame_header *fh; - unsigned int hlen; /* header length implies the version */ - unsigned int tlen; /* trailer length */ -@@ -1586,10 +1584,8 @@ static int fcoe_xmit(struct fc_lport *lp - skb_shinfo(skb)->gso_size = 0; - } - /* update tx stats: regardless if LLD fails */ -- stats = per_cpu_ptr(lport->stats, get_cpu()); -- stats->TxFrames++; -- stats->TxWords += wlen; -- put_cpu(); -+ this_cpu_inc(lport->stats->TxFrames); -+ this_cpu_add(lport->stats->TxWords, wlen); - - /* send down to lld */ - fr_dev(fp) = lport; -@@ -1611,7 +1607,6 @@ static inline int fcoe_filter_frames(str - struct fcoe_interface *fcoe; - struct fc_frame_header *fh; - struct sk_buff *skb = (struct sk_buff *)fp; -- struct fc_stats *stats; - - /* - * We only check CRC if no offload is available and if it is -@@ -1641,11 +1636,8 @@ static inline int fcoe_filter_frames(str - return 0; - } - -- stats = per_cpu_ptr(lport->stats, get_cpu()); -- stats->InvalidCRCCount++; -- if (stats->InvalidCRCCount < 5) -+ if (this_cpu_inc_return(lport->stats->InvalidCRCCount) < 5) - printk(KERN_WARNING "fcoe: dropping frame with CRC error\n"); -- put_cpu(); - return -EINVAL; - } - -@@ -1658,7 +1650,6 @@ static void fcoe_recv_frame(struct sk_bu - u32 fr_len; - struct fc_lport *lport; - struct fcoe_rcv_info *fr; -- struct fc_stats *stats; - struct fcoe_crc_eof crc_eof; - struct fc_frame *fp; - struct fcoe_hdr *hp; -@@ -1686,9 +1677,11 @@ static void fcoe_recv_frame(struct sk_bu - */ - hp = (struct fcoe_hdr *) skb_network_header(skb); - -- stats = per_cpu_ptr(lport->stats, get_cpu()); - if (unlikely(FC_FCOE_DECAPS_VER(hp) != FC_FCOE_VER)) { -- if (stats->ErrorFrames < 5) -+ struct fc_stats *stats; -+ -+ stats = per_cpu_ptr(lport->stats, raw_smp_processor_id()); -+ if (READ_ONCE(stats->ErrorFrames) < 5) - printk(KERN_WARNING "fcoe: FCoE version " - "mismatch: The frame has " - "version %x, but the " -@@ -1701,8 +1694,8 @@ static void fcoe_recv_frame(struct sk_bu - skb_pull(skb, sizeof(struct fcoe_hdr)); - fr_len = skb->len - sizeof(struct fcoe_crc_eof); - -- stats->RxFrames++; -- stats->RxWords += fr_len / FCOE_WORD_TO_BYTE; -+ this_cpu_inc(lport->stats->RxFrames); -+ this_cpu_add(lport->stats->RxWords, fr_len / FCOE_WORD_TO_BYTE); - - fp = (struct fc_frame *)skb; - fc_frame_init(fp); -@@ -1718,13 +1711,11 @@ static void fcoe_recv_frame(struct sk_bu - goto drop; - - if (!fcoe_filter_frames(lport, fp)) { -- put_cpu(); - fc_exch_recv(lport, fp); - return; - } - drop: -- stats->ErrorFrames++; -- put_cpu(); -+ this_cpu_inc(lport->stats->ErrorFrames); - kfree_skb(skb); - } - -@@ -1848,7 +1839,6 @@ static int fcoe_device_notification(stru - struct net_device *netdev = netdev_notifier_info_to_dev(ptr); - struct fcoe_ctlr *ctlr; - struct fcoe_interface *fcoe; -- struct fc_stats *stats; - u32 link_possible = 1; - u32 mfs; - int rc = NOTIFY_OK; -@@ -1922,9 +1912,7 @@ static int fcoe_device_notification(stru - break; - case FCOE_CTLR_ENABLED: - case FCOE_CTLR_UNUSED: -- stats = per_cpu_ptr(lport->stats, get_cpu()); -- stats->LinkFailureCount++; -- put_cpu(); -+ this_cpu_inc(lport->stats->LinkFailureCount); - fcoe_clean_pending_queue(lport); - } - } ---- a/drivers/scsi/fcoe/fcoe_ctlr.c -+++ b/drivers/scsi/fcoe/fcoe_ctlr.c -@@ -824,22 +824,21 @@ static unsigned long fcoe_ctlr_age_fcfs( - unsigned long deadline; - unsigned long sel_time = 0; - struct list_head del_list; -- struct fc_stats *stats; - - INIT_LIST_HEAD(&del_list); - -- stats = per_cpu_ptr(fip->lp->stats, get_cpu()); -- - list_for_each_entry_safe(fcf, next, &fip->fcfs, list) { - deadline = fcf->time + fcf->fka_period + fcf->fka_period / 2; - if (fip->sel_fcf == fcf) { - if (time_after(jiffies, deadline)) { -- stats->MissDiscAdvCount++; -+ u64 miss_cnt; -+ -+ miss_cnt = this_cpu_inc_return(fip->lp->stats->MissDiscAdvCount); - printk(KERN_INFO "libfcoe: host%d: " - "Missing Discovery Advertisement " - "for fab %16.16llx count %lld\n", - fip->lp->host->host_no, fcf->fabric_name, -- stats->MissDiscAdvCount); -+ miss_cnt); - } else if (time_after(next_timer, deadline)) - next_timer = deadline; - } -@@ -855,7 +854,7 @@ static unsigned long fcoe_ctlr_age_fcfs( - */ - list_del(&fcf->list); - list_add(&fcf->list, &del_list); -- stats->VLinkFailureCount++; -+ this_cpu_inc(fip->lp->stats->VLinkFailureCount); - } else { - if (time_after(next_timer, deadline)) - next_timer = deadline; -@@ -864,7 +863,6 @@ static unsigned long fcoe_ctlr_age_fcfs( - sel_time = fcf->time; - } - } -- put_cpu(); - - list_for_each_entry_safe(fcf, next, &del_list, list) { - /* Removes fcf from current list */ -@@ -1142,7 +1140,6 @@ static void fcoe_ctlr_recv_els(struct fc - struct fip_desc *desc; - struct fip_encaps *els; - struct fcoe_fcf *sel; -- struct fc_stats *stats; - enum fip_desc_type els_dtype = 0; - u8 els_op; - u8 sub; -@@ -1286,10 +1283,8 @@ static void fcoe_ctlr_recv_els(struct fc - fr_dev(fp) = lport; - fr_encaps(fp) = els_dtype; - -- stats = per_cpu_ptr(lport->stats, get_cpu()); -- stats->RxFrames++; -- stats->RxWords += skb->len / FIP_BPW; -- put_cpu(); -+ this_cpu_inc(lport->stats->RxFrames); -+ this_cpu_add(lport->stats->RxWords, skb->len / FIP_BPW); - - fc_exch_recv(lport, fp); - return; -@@ -1427,9 +1422,7 @@ static void fcoe_ctlr_recv_clr_vlink(str - ntoh24(vp->fd_fc_id)); - if (vn_port && (vn_port == lport)) { - mutex_lock(&fip->ctlr_mutex); -- per_cpu_ptr(lport->stats, -- get_cpu())->VLinkFailureCount++; -- put_cpu(); -+ this_cpu_inc(lport->stats->VLinkFailureCount); - fcoe_ctlr_reset(fip); - mutex_unlock(&fip->ctlr_mutex); - } -@@ -1457,8 +1450,7 @@ static void fcoe_ctlr_recv_clr_vlink(str - * followed by physical port - */ - mutex_lock(&fip->ctlr_mutex); -- per_cpu_ptr(lport->stats, get_cpu())->VLinkFailureCount++; -- put_cpu(); -+ this_cpu_inc(lport->stats->VLinkFailureCount); - fcoe_ctlr_reset(fip); - mutex_unlock(&fip->ctlr_mutex); - ---- a/drivers/scsi/fcoe/fcoe_transport.c -+++ b/drivers/scsi/fcoe/fcoe_transport.c -@@ -183,9 +183,9 @@ void __fcoe_get_lesb(struct fc_lport *lp - memset(lesb, 0, sizeof(*lesb)); - for_each_possible_cpu(cpu) { - stats = per_cpu_ptr(lport->stats, cpu); -- lfc += stats->LinkFailureCount; -- vlfc += stats->VLinkFailureCount; -- mdac += stats->MissDiscAdvCount; -+ lfc += READ_ONCE(stats->LinkFailureCount); -+ vlfc += READ_ONCE(stats->VLinkFailureCount); -+ mdac += READ_ONCE(stats->MissDiscAdvCount); - } - lesb->lesb_link_fail = htonl(lfc); - lesb->lesb_vlink_fail = htonl(vlfc); ---- a/drivers/scsi/libfc/fc_fcp.c -+++ b/drivers/scsi/libfc/fc_fcp.c -@@ -143,8 +143,7 @@ static struct fc_fcp_pkt *fc_fcp_pkt_all - INIT_LIST_HEAD(&fsp->list); - spin_lock_init(&fsp->scsi_pkt_lock); - } else { -- per_cpu_ptr(lport->stats, get_cpu())->FcpPktAllocFails++; -- put_cpu(); -+ this_cpu_inc(lport->stats->FcpPktAllocFails); - } - return fsp; - } -@@ -266,8 +265,7 @@ static int fc_fcp_send_abort(struct fc_f - if (!fsp->seq_ptr) - return -EINVAL; - -- per_cpu_ptr(fsp->lp->stats, get_cpu())->FcpPktAborts++; -- put_cpu(); -+ this_cpu_inc(fsp->lp->stats->FcpPktAborts); - - fsp->state |= FC_SRB_ABORT_PENDING; - rc = fc_seq_exch_abort(fsp->seq_ptr, 0); -@@ -436,8 +434,7 @@ static inline struct fc_frame *fc_fcp_fr - if (likely(fp)) - return fp; - -- per_cpu_ptr(lport->stats, get_cpu())->FcpFrameAllocFails++; -- put_cpu(); -+ this_cpu_inc(lport->stats->FcpFrameAllocFails); - /* error case */ - fc_fcp_can_queue_ramp_down(lport); - shost_printk(KERN_ERR, lport->host, -@@ -471,7 +468,6 @@ static void fc_fcp_recv_data(struct fc_f - { - struct scsi_cmnd *sc = fsp->cmd; - struct fc_lport *lport = fsp->lp; -- struct fc_stats *stats; - struct fc_frame_header *fh; - size_t start_offset; - size_t offset; -@@ -533,14 +529,12 @@ static void fc_fcp_recv_data(struct fc_f - - if (~crc != le32_to_cpu(fr_crc(fp))) { - crc_err: -- stats = per_cpu_ptr(lport->stats, get_cpu()); -- stats->ErrorFrames++; -+ this_cpu_inc(lport->stats->ErrorFrames); - /* per cpu count, not total count, but OK for limit */ -- if (stats->InvalidCRCCount++ < FC_MAX_ERROR_CNT) -+ if (this_cpu_inc_return(lport->stats->InvalidCRCCount) < FC_MAX_ERROR_CNT) - printk(KERN_WARNING "libfc: CRC error on data " - "frame for port (%6.6x)\n", - lport->port_id); -- put_cpu(); - /* - * Assume the frame is total garbage. - * We may have copied it over the good part -@@ -1861,7 +1855,6 @@ int fc_queuecommand(struct Scsi_Host *sh - struct fc_fcp_pkt *fsp; - int rval; - int rc = 0; -- struct fc_stats *stats; - - rval = fc_remote_port_chkready(rport); - if (rval) { -@@ -1913,20 +1906,18 @@ int fc_queuecommand(struct Scsi_Host *sh - /* - * setup the data direction - */ -- stats = per_cpu_ptr(lport->stats, get_cpu()); - if (sc_cmd->sc_data_direction == DMA_FROM_DEVICE) { - fsp->req_flags = FC_SRB_READ; -- stats->InputRequests++; -- stats->InputBytes += fsp->data_len; -+ this_cpu_inc(lport->stats->InputRequests); -+ this_cpu_add(lport->stats->InputBytes, fsp->data_len); - } else if (sc_cmd->sc_data_direction == DMA_TO_DEVICE) { - fsp->req_flags = FC_SRB_WRITE; -- stats->OutputRequests++; -- stats->OutputBytes += fsp->data_len; -+ this_cpu_inc(lport->stats->OutputRequests); -+ this_cpu_add(lport->stats->OutputBytes, fsp->data_len); - } else { - fsp->req_flags = 0; -- stats->ControlRequests++; -+ this_cpu_inc(lport->stats->ControlRequests); - } -- put_cpu(); - - /* - * send it to the lower layer ---- a/drivers/scsi/libfc/fc_lport.c -+++ b/drivers/scsi/libfc/fc_lport.c -@@ -308,21 +308,21 @@ struct fc_host_statistics *fc_get_host_s - - stats = per_cpu_ptr(lport->stats, cpu); - -- fc_stats->tx_frames += stats->TxFrames; -- fc_stats->tx_words += stats->TxWords; -- fc_stats->rx_frames += stats->RxFrames; -- fc_stats->rx_words += stats->RxWords; -- fc_stats->error_frames += stats->ErrorFrames; -- fc_stats->invalid_crc_count += stats->InvalidCRCCount; -- fc_stats->fcp_input_requests += stats->InputRequests; -- fc_stats->fcp_output_requests += stats->OutputRequests; -- fc_stats->fcp_control_requests += stats->ControlRequests; -- fcp_in_bytes += stats->InputBytes; -- fcp_out_bytes += stats->OutputBytes; -- fc_stats->fcp_packet_alloc_failures += stats->FcpPktAllocFails; -- fc_stats->fcp_packet_aborts += stats->FcpPktAborts; -- fc_stats->fcp_frame_alloc_failures += stats->FcpFrameAllocFails; -- fc_stats->link_failure_count += stats->LinkFailureCount; -+ fc_stats->tx_frames += READ_ONCE(stats->TxFrames); -+ fc_stats->tx_words += READ_ONCE(stats->TxWords); -+ fc_stats->rx_frames += READ_ONCE(stats->RxFrames); -+ fc_stats->rx_words += READ_ONCE(stats->RxWords); -+ fc_stats->error_frames += READ_ONCE(stats->ErrorFrames); -+ fc_stats->invalid_crc_count += READ_ONCE(stats->InvalidCRCCount); -+ fc_stats->fcp_input_requests += READ_ONCE(stats->InputRequests); -+ fc_stats->fcp_output_requests += READ_ONCE(stats->OutputRequests); -+ fc_stats->fcp_control_requests += READ_ONCE(stats->ControlRequests); -+ fcp_in_bytes += READ_ONCE(stats->InputBytes); -+ fcp_out_bytes += READ_ONCE(stats->OutputBytes); -+ fc_stats->fcp_packet_alloc_failures += READ_ONCE(stats->FcpPktAllocFails); -+ fc_stats->fcp_packet_aborts += READ_ONCE(stats->FcpPktAborts); -+ fc_stats->fcp_frame_alloc_failures += READ_ONCE(stats->FcpFrameAllocFails); -+ fc_stats->link_failure_count += READ_ONCE(stats->LinkFailureCount); - } - fc_stats->fcp_input_megabytes = div_u64(fcp_in_bytes, 1000000); - fc_stats->fcp_output_megabytes = div_u64(fcp_out_bytes, 1000000); ---- a/drivers/scsi/qedf/qedf_main.c -+++ b/drivers/scsi/qedf/qedf_main.c -@@ -1067,7 +1067,6 @@ static int qedf_xmit(struct fc_lport *lp - u32 crc; - unsigned int hlen, tlen, elen; - int wlen; -- struct fc_stats *stats; - struct fc_lport *tmp_lport; - struct fc_lport *vn_port = NULL; - struct qedf_rport *fcport; -@@ -1215,10 +1214,8 @@ static int qedf_xmit(struct fc_lport *lp - hp->fcoe_sof = sof; - - /*update tx stats */ -- stats = per_cpu_ptr(lport->stats, get_cpu()); -- stats->TxFrames++; -- stats->TxWords += wlen; -- put_cpu(); -+ this_cpu_inc(lport->stats->TxFrames); -+ this_cpu_add(lport->stats->TxWords, wlen); - - /* Get VLAN ID from skb for printing purposes */ - __vlan_hwaccel_get_tag(skb, &vlan_tci); diff --git a/debian/patches-rt/0002-signal-Replace-__group_send_sig_info-with-send_signa.patch b/debian/patches-rt/0002-signal-Replace-__group_send_sig_info-with-send_signa.patch deleted file mode 100644 index 4f504187f..000000000 --- a/debian/patches-rt/0002-signal-Replace-__group_send_sig_info-with-send_signa.patch +++ /dev/null @@ -1,97 +0,0 @@ -From: "Eric W. Biederman" <ebiederm@xmission.com> -Date: Thu, 5 May 2022 13:26:35 -0500 -Subject: [PATCH 02/12] signal: Replace __group_send_sig_info with - send_signal_locked -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/5.18/older/patches-5.18-rt11.tar.xz - -The function __group_send_sig_info is just a light wrapper around -send_signal_locked with one parameter fixed to a constant value. As -the wrapper adds no real value update the code to directly call the -wrapped function. - -Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com> -Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> -Link: https://lore.kernel.org/r/20220505182645.497868-2-ebiederm@xmission.com ---- - drivers/tty/tty_jobctrl.c | 4 ++-- - include/linux/signal.h | 1 - - kernel/signal.c | 8 +------- - kernel/time/posix-cpu-timers.c | 6 +++--- - 4 files changed, 6 insertions(+), 13 deletions(-) - ---- a/drivers/tty/tty_jobctrl.c -+++ b/drivers/tty/tty_jobctrl.c -@@ -215,8 +215,8 @@ int tty_signal_session_leader(struct tty - spin_unlock_irq(&p->sighand->siglock); - continue; - } -- __group_send_sig_info(SIGHUP, SEND_SIG_PRIV, p); -- __group_send_sig_info(SIGCONT, SEND_SIG_PRIV, p); -+ send_signal_locked(SIGHUP, SEND_SIG_PRIV, p, PIDTYPE_TGID); -+ send_signal_locked(SIGCONT, SEND_SIG_PRIV, p, PIDTYPE_TGID); - put_pid(p->signal->tty_old_pgrp); /* A noop */ - spin_lock(&tty->ctrl.lock); - tty_pgrp = get_pid(tty->ctrl.pgrp); ---- a/include/linux/signal.h -+++ b/include/linux/signal.h -@@ -282,7 +282,6 @@ extern int do_send_sig_info(int sig, str - struct task_struct *p, enum pid_type type); - extern int group_send_sig_info(int sig, struct kernel_siginfo *info, - struct task_struct *p, enum pid_type type); --extern int __group_send_sig_info(int, struct kernel_siginfo *, struct task_struct *); - extern int send_signal_locked(int sig, struct kernel_siginfo *info, - struct task_struct *p, enum pid_type type); - extern int sigprocmask(int, sigset_t *, sigset_t *); ---- a/kernel/signal.c -+++ b/kernel/signal.c -@@ -1281,12 +1281,6 @@ static int __init setup_print_fatal_sign - - __setup("print-fatal-signals=", setup_print_fatal_signals); - --int --__group_send_sig_info(int sig, struct kernel_siginfo *info, struct task_struct *p) --{ -- return send_signal_locked(sig, info, p, PIDTYPE_TGID); --} -- - int do_send_sig_info(int sig, struct kernel_siginfo *info, struct task_struct *p, - enum pid_type type) - { -@@ -2173,7 +2167,7 @@ static void do_notify_parent_cldstop(str - spin_lock_irqsave(&sighand->siglock, flags); - if (sighand->action[SIGCHLD-1].sa.sa_handler != SIG_IGN && - !(sighand->action[SIGCHLD-1].sa.sa_flags & SA_NOCLDSTOP)) -- __group_send_sig_info(SIGCHLD, &info, parent); -+ send_signal_locked(SIGCHLD, &info, parent, PIDTYPE_TGID); - /* - * Even if SIGCHLD is not generated, we must wake up wait4 calls. - */ ---- a/kernel/time/posix-cpu-timers.c -+++ b/kernel/time/posix-cpu-timers.c -@@ -870,7 +870,7 @@ static inline void check_dl_overrun(stru - { - if (tsk->dl.dl_overrun) { - tsk->dl.dl_overrun = 0; -- __group_send_sig_info(SIGXCPU, SEND_SIG_PRIV, tsk); -+ send_signal_locked(SIGXCPU, SEND_SIG_PRIV, tsk, PIDTYPE_TGID); - } - } - -@@ -884,7 +884,7 @@ static bool check_rlimit(u64 time, u64 l - rt ? "RT" : "CPU", hard ? "hard" : "soft", - current->comm, task_pid_nr(current)); - } -- __group_send_sig_info(signo, SEND_SIG_PRIV, current); -+ send_signal_locked(signo, SEND_SIG_PRIV, current, PIDTYPE_TGID); - return true; - } - -@@ -958,7 +958,7 @@ static void check_cpu_itimer(struct task - trace_itimer_expire(signo == SIGPROF ? - ITIMER_PROF : ITIMER_VIRTUAL, - task_tgid(tsk), cur_time); -- __group_send_sig_info(signo, SEND_SIG_PRIV, tsk); -+ send_signal_locked(signo, SEND_SIG_PRIV, tsk, PIDTYPE_TGID); - } - - if (it->expires && it->expires < *expires) diff --git a/debian/patches-rt/0002-smp-Rename-flush_smp_call_function_from_idle.patch b/debian/patches-rt/0002-smp-Rename-flush_smp_call_function_from_idle.patch deleted file mode 100644 index 12c6772e3..000000000 --- a/debian/patches-rt/0002-smp-Rename-flush_smp_call_function_from_idle.patch +++ /dev/null @@ -1,126 +0,0 @@ -From: Thomas Gleixner <tglx@linutronix.de> -Date: Wed, 13 Apr 2022 15:31:03 +0200 -Subject: [PATCH 2/3] smp: Rename flush_smp_call_function_from_idle() -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/5.18/older/patches-5.18-rt11.tar.xz - -This is invoked from the stopper thread too, which is definitely not idle. -Rename it to flush_smp_call_function_queue() and fixup the callers. - -Signed-off-by: Thomas Gleixner <tglx@linutronix.de> -Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> -Link: https://lore.kernel.org/r/20220413133024.305001096@linutronix.de ---- - kernel/sched/core.c | 2 +- - kernel/sched/idle.c | 2 +- - kernel/sched/smp.h | 4 ++-- - kernel/smp.c | 27 ++++++++++++++++++++------- - 4 files changed, 24 insertions(+), 11 deletions(-) - ---- a/kernel/sched/core.c -+++ b/kernel/sched/core.c -@@ -2411,7 +2411,7 @@ static int migration_cpu_stop(void *data - * __migrate_task() such that we will not miss enforcing cpus_ptr - * during wakeups, see set_cpus_allowed_ptr()'s TASK_WAKING test. - */ -- flush_smp_call_function_from_idle(); -+ flush_smp_call_function_queue(); - - raw_spin_lock(&p->pi_lock); - rq_lock(rq, &rf); ---- a/kernel/sched/idle.c -+++ b/kernel/sched/idle.c -@@ -327,7 +327,7 @@ static void do_idle(void) - * RCU relies on this call to be done outside of an RCU read-side - * critical section. - */ -- flush_smp_call_function_from_idle(); -+ flush_smp_call_function_queue(); - schedule_idle(); - - if (unlikely(klp_patch_pending(current))) ---- a/kernel/sched/smp.h -+++ b/kernel/sched/smp.h -@@ -9,7 +9,7 @@ extern void sched_ttwu_pending(void *arg - extern void send_call_function_single_ipi(int cpu); - - #ifdef CONFIG_SMP --extern void flush_smp_call_function_from_idle(void); -+extern void flush_smp_call_function_queue(void); - #else --static inline void flush_smp_call_function_from_idle(void) { } -+static inline void flush_smp_call_function_queue(void) { } - #endif ---- a/kernel/smp.c -+++ b/kernel/smp.c -@@ -96,7 +96,7 @@ static DEFINE_PER_CPU_ALIGNED(struct cal - - static DEFINE_PER_CPU_SHARED_ALIGNED(struct llist_head, call_single_queue); - --static void flush_smp_call_function_queue(bool warn_cpu_offline); -+static void __flush_smp_call_function_queue(bool warn_cpu_offline); - - int smpcfd_prepare_cpu(unsigned int cpu) - { -@@ -141,7 +141,7 @@ int smpcfd_dying_cpu(unsigned int cpu) - * ensure that the outgoing CPU doesn't go offline with work - * still pending. - */ -- flush_smp_call_function_queue(false); -+ __flush_smp_call_function_queue(false); - irq_work_run(); - return 0; - } -@@ -541,11 +541,11 @@ void generic_smp_call_function_single_in - { - cfd_seq_store(this_cpu_ptr(&cfd_seq_local)->gotipi, CFD_SEQ_NOCPU, - smp_processor_id(), CFD_SEQ_GOTIPI); -- flush_smp_call_function_queue(true); -+ __flush_smp_call_function_queue(true); - } - - /** -- * flush_smp_call_function_queue - Flush pending smp-call-function callbacks -+ * __flush_smp_call_function_queue - Flush pending smp-call-function callbacks - * - * @warn_cpu_offline: If set to 'true', warn if callbacks were queued on an - * offline CPU. Skip this check if set to 'false'. -@@ -558,7 +558,7 @@ void generic_smp_call_function_single_in - * Loop through the call_single_queue and run all the queued callbacks. - * Must be called with interrupts disabled. - */ --static void flush_smp_call_function_queue(bool warn_cpu_offline) -+static void __flush_smp_call_function_queue(bool warn_cpu_offline) - { - call_single_data_t *csd, *csd_next; - struct llist_node *entry, *prev; -@@ -681,7 +681,20 @@ static void flush_smp_call_function_queu - smp_processor_id(), CFD_SEQ_HDLEND); - } - --void flush_smp_call_function_from_idle(void) -+ -+/** -+ * flush_smp_call_function_queue - Flush pending smp-call-function callbacks -+ * from task context (idle, migration thread) -+ * -+ * When TIF_POLLING_NRFLAG is supported and a CPU is in idle and has it -+ * set, then remote CPUs can avoid sending IPIs and wake the idle CPU by -+ * setting TIF_NEED_RESCHED. The idle task on the woken up CPU has to -+ * handle queued SMP function calls before scheduling. -+ * -+ * The migration thread has to ensure that an eventually pending wakeup has -+ * been handled before it migrates a task. -+ */ -+void flush_smp_call_function_queue(void) - { - unsigned long flags; - -@@ -691,7 +704,7 @@ void flush_smp_call_function_from_idle(v - cfd_seq_store(this_cpu_ptr(&cfd_seq_local)->idle, CFD_SEQ_NOCPU, - smp_processor_id(), CFD_SEQ_IDLE); - local_irq_save(flags); -- flush_smp_call_function_queue(true); -+ __flush_smp_call_function_queue(true); - if (local_softirq_pending()) - do_softirq(); - diff --git a/debian/patches-rt/0003-drm-i915-Use-preempt_disable-enable_rt-where-recomme.patch b/debian/patches-rt/0003-drm-i915-Use-preempt_disable-enable_rt-where-recomme.patch index 36fb863f2..13d4aa7da 100644 --- a/debian/patches-rt/0003-drm-i915-Use-preempt_disable-enable_rt-where-recomme.patch +++ b/debian/patches-rt/0003-drm-i915-Use-preempt_disable-enable_rt-where-recomme.patch @@ -2,7 +2,7 @@ From: Mike Galbraith <umgwanakikbuti@gmail.com> Date: Sat, 27 Feb 2016 08:09:11 +0100 Subject: [PATCH 03/10] drm/i915: Use preempt_disable/enable_rt() where recommended -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/5.18/older/patches-5.18-rt11.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/5.19/older/patches-5.19-rc3-rt5.tar.xz Mario Kleiner suggest in commit ad3543ede630f ("drm/intel: Push get_scanout_position() timestamping into kms driver.") diff --git a/debian/patches-rt/0003-fs-dcache-Use-__d_lookup_unhash-in-__d_add-move.patch b/debian/patches-rt/0003-fs-dcache-Use-__d_lookup_unhash-in-__d_add-move.patch new file mode 100644 index 000000000..99da461ef --- /dev/null +++ b/debian/patches-rt/0003-fs-dcache-Use-__d_lookup_unhash-in-__d_add-move.patch @@ -0,0 +1,81 @@ +From: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +Date: Sun, 12 Jun 2022 16:27:31 +0200 +Subject: [PATCH 3/4] fs/dcache: Use __d_lookup_unhash() in __d_add/move() +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/5.19/older/patches-5.19-rc3-rt5.tar.xz + +__d_add() and __d_move() invoke __d_lookup_done() from within a preemption +disabled region. This violates the PREEMPT_RT constraints as the wake up +acquires wait_queue_head::lock which is a "sleeping" spinlock on RT. + +As a preparation for solving this completely, invoke __d_lookup_unhash() +from __d_add/move() and handle the wakeup there. + +This allows to move the spin_lock/unlock(dentry::lock) pair into +__d_lookup_done() which debloats the d_lookup_done() inline. + +No functional change. Moving the wake up out of the preemption disabled +region on RT will be handled in a subsequent change. + +Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> +Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +Link: https://lkml.kernel.org/r/20220613140712.77932-4-bigeasy@linutronix.de +--- + fs/dcache.c | 6 ++++-- + include/linux/dcache.h | 7 ++----- + 2 files changed, 6 insertions(+), 7 deletions(-) + +--- a/fs/dcache.c ++++ b/fs/dcache.c +@@ -2737,7 +2737,9 @@ static wait_queue_head_t *__d_lookup_unh + + void __d_lookup_done(struct dentry *dentry) + { ++ spin_lock(&dentry->d_lock); + wake_up_all(__d_lookup_unhash(dentry)); ++ spin_unlock(&dentry->d_lock); + } + EXPORT_SYMBOL(__d_lookup_done); + +@@ -2751,7 +2753,7 @@ static inline void __d_add(struct dentry + if (unlikely(d_in_lookup(dentry))) { + dir = dentry->d_parent->d_inode; + n = start_dir_add(dir); +- __d_lookup_done(dentry); ++ wake_up_all(__d_lookup_unhash(dentry)); + } + if (inode) { + unsigned add_flags = d_flags_for_inode(inode); +@@ -2940,7 +2942,7 @@ static void __d_move(struct dentry *dent + if (unlikely(d_in_lookup(target))) { + dir = target->d_parent->d_inode; + n = start_dir_add(dir); +- __d_lookup_done(target); ++ wake_up_all(__d_lookup_unhash(target)); + } + + write_seqcount_begin(&dentry->d_seq); +--- a/include/linux/dcache.h ++++ b/include/linux/dcache.h +@@ -349,7 +349,7 @@ static inline void dont_mount(struct den + spin_unlock(&dentry->d_lock); + } + +-extern void __d_lookup_done(struct dentry *); ++extern void __d_lookup_done(struct dentry *dentry); + + static inline int d_in_lookup(const struct dentry *dentry) + { +@@ -358,11 +358,8 @@ static inline int d_in_lookup(const stru + + static inline void d_lookup_done(struct dentry *dentry) + { +- if (unlikely(d_in_lookup(dentry))) { +- spin_lock(&dentry->d_lock); ++ if (unlikely(d_in_lookup(dentry))) + __d_lookup_done(dentry); +- spin_unlock(&dentry->d_lock); +- } + } + + extern void dput(struct dentry *); diff --git a/debian/patches-rt/0003-scsi-libfc-Remove-get_cpu-semantics-in-fc_exch_em_al.patch b/debian/patches-rt/0003-scsi-libfc-Remove-get_cpu-semantics-in-fc_exch_em_al.patch deleted file mode 100644 index 599c9a0f8..000000000 --- a/debian/patches-rt/0003-scsi-libfc-Remove-get_cpu-semantics-in-fc_exch_em_al.patch +++ /dev/null @@ -1,44 +0,0 @@ -From: Davidlohr Bueso <dave@stgolabs.net> -Date: Fri, 6 May 2022 12:57:57 +0200 -Subject: [PATCH 3/4] scsi: libfc: Remove get_cpu() semantics in - fc_exch_em_alloc() -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/5.18/older/patches-5.18-rt11.tar.xz - -The get_cpu() in fc_exch_em_alloc() was introduced in: - - f018b73af6db ([SCSI] libfc, libfcoe, fcoe: use smp_processor_id() only when preempt disabled) - -for no other reason than to simply use smp_processor_id() -without getting a warning, because everything is done with -the pool->lock held anyway. However, get_cpu(), by disabling -preemption, does not play well with PREEMPT_RT, particularly -when acquiring a regular (and thus sleepable) spinlock. - -Therefore remove the get_cpu() and just use the unstable value -as we will have CPU locality guarantees next by taking the lock. -The window of migration, as noted by Sebastian, is small and -even if it happens the result is correct. - -Signed-off-by: Davidlohr Bueso <dbueso@suse.de> -Acked-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> -Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> -Link: https://lore.kernel.org/r/20211117025956.79616-2-dave@stgolabs.net -Link: https://lore.kernel.org/r/20220506105758.283887-4-bigeasy@linutronix.de ---- - drivers/scsi/libfc/fc_exch.c | 3 +-- - 1 file changed, 1 insertion(+), 2 deletions(-) - ---- a/drivers/scsi/libfc/fc_exch.c -+++ b/drivers/scsi/libfc/fc_exch.c -@@ -825,10 +825,9 @@ static struct fc_exch *fc_exch_em_alloc( - } - memset(ep, 0, sizeof(*ep)); - -- cpu = get_cpu(); -+ cpu = raw_smp_processor_id(); - pool = per_cpu_ptr(mp->pool, cpu); - spin_lock_bh(&pool->lock); -- put_cpu(); - - /* peek cache of free slot */ - if (pool->left != FC_XID_UNKNOWN) { diff --git a/debian/patches-rt/0003-smp-Make-softirq-handling-RT-safe-in-flush_smp_call_.patch b/debian/patches-rt/0003-smp-Make-softirq-handling-RT-safe-in-flush_smp_call_.patch deleted file mode 100644 index 845b6a35b..000000000 --- a/debian/patches-rt/0003-smp-Make-softirq-handling-RT-safe-in-flush_smp_call_.patch +++ /dev/null @@ -1,107 +0,0 @@ -From: Sebastian Andrzej Siewior <bigeasy@linutronix.de> -Date: Wed, 13 Apr 2022 15:31:05 +0200 -Subject: [PATCH 3/3] smp: Make softirq handling RT safe in - flush_smp_call_function_queue() -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/5.18/older/patches-5.18-rt11.tar.xz - -flush_smp_call_function_queue() invokes do_softirq() which is not available -on PREEMPT_RT. flush_smp_call_function_queue() is invoked from the idle -task and the migration task with preemption or interrupts disabled. - -So RT kernels cannot process soft interrupts in that context as that has to -acquire 'sleeping spinlocks' which is not possible with preemption or -interrupts disabled and forbidden from the idle task anyway. - -The currently known SMP function call which raises a soft interrupt is in -the block layer, but this functionality is not enabled on RT kernels due to -latency and performance reasons. - -RT could wake up ksoftirqd unconditionally, but this wants to be avoided if -there were soft interrupts pending already when this is invoked in the -context of the migration task. The migration task might have preempted a -threaded interrupt handler which raised a soft interrupt, but did not reach -the local_bh_enable() to process it. The "running" ksoftirqd might prevent -the handling in the interrupt thread context which is causing latency -issues. - -Add a new function which handles this case explicitely for RT and falls -back to do_softirq() on !RT kernels. In the RT case this warns when one of -the flushed SMP function calls raised a soft interrupt so this can be -investigated. - -[ tglx: Moved the RT part out of SMP code ] - -Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> -Signed-off-by: Thomas Gleixner <tglx@linutronix.de> -Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> -Link: https://lore.kernel.org/r/YgKgL6aPj8aBES6G@linutronix.de -Link: https://lore.kernel.org/r/20220413133024.356509586@linutronix.de ---- - include/linux/interrupt.h | 9 +++++++++ - kernel/smp.c | 5 ++++- - kernel/softirq.c | 13 +++++++++++++ - 3 files changed, 26 insertions(+), 1 deletion(-) - ---- a/include/linux/interrupt.h -+++ b/include/linux/interrupt.h -@@ -607,6 +607,15 @@ struct softirq_action - asmlinkage void do_softirq(void); - asmlinkage void __do_softirq(void); - -+#ifdef CONFIG_PREEMPT_RT -+extern void do_softirq_post_smp_call_flush(unsigned int was_pending); -+#else -+static inline void do_softirq_post_smp_call_flush(unsigned int unused) -+{ -+ do_softirq(); -+} -+#endif -+ - extern void open_softirq(int nr, void (*action)(struct softirq_action *)); - extern void softirq_init(void); - extern void __raise_softirq_irqoff(unsigned int nr); ---- a/kernel/smp.c -+++ b/kernel/smp.c -@@ -696,6 +696,7 @@ static void __flush_smp_call_function_qu - */ - void flush_smp_call_function_queue(void) - { -+ unsigned int was_pending; - unsigned long flags; - - if (llist_empty(this_cpu_ptr(&call_single_queue))) -@@ -704,9 +705,11 @@ void flush_smp_call_function_queue(void) - cfd_seq_store(this_cpu_ptr(&cfd_seq_local)->idle, CFD_SEQ_NOCPU, - smp_processor_id(), CFD_SEQ_IDLE); - local_irq_save(flags); -+ /* Get the already pending soft interrupts for RT enabled kernels */ -+ was_pending = local_softirq_pending(); - __flush_smp_call_function_queue(true); - if (local_softirq_pending()) -- do_softirq(); -+ do_softirq_post_smp_call_flush(was_pending); - - local_irq_restore(flags); - } ---- a/kernel/softirq.c -+++ b/kernel/softirq.c -@@ -294,6 +294,19 @@ static inline void invoke_softirq(void) - wakeup_softirqd(); - } - -+/* -+ * flush_smp_call_function_queue() can raise a soft interrupt in a function -+ * call. On RT kernels this is undesired and the only known functionality -+ * in the block layer which does this is disabled on RT. If soft interrupts -+ * get raised which haven't been raised before the flush, warn so it can be -+ * investigated. -+ */ -+void do_softirq_post_smp_call_flush(unsigned int was_pending) -+{ -+ if (WARN_ON_ONCE(was_pending != local_softirq_pending())) -+ invoke_softirq(); -+} -+ - #else /* CONFIG_PREEMPT_RT */ - - /* diff --git a/debian/patches-rt/0004-drm-i915-Don-t-disable-interrupts-on-PREEMPT_RT-duri.patch b/debian/patches-rt/0004-drm-i915-Don-t-disable-interrupts-on-PREEMPT_RT-duri.patch index 622f096ff..bfaffa102 100644 --- a/debian/patches-rt/0004-drm-i915-Don-t-disable-interrupts-on-PREEMPT_RT-duri.patch +++ b/debian/patches-rt/0004-drm-i915-Don-t-disable-interrupts-on-PREEMPT_RT-duri.patch @@ -2,7 +2,7 @@ From: Mike Galbraith <umgwanakikbuti@gmail.com> Date: Sat, 27 Feb 2016 09:01:42 +0100 Subject: [PATCH 04/10] drm/i915: Don't disable interrupts on PREEMPT_RT during atomic updates -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/5.18/older/patches-5.18-rt11.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/5.19/older/patches-5.19-rc3-rt5.tar.xz Commit 8d7849db3eab7 ("drm/i915: Make sprite updates atomic") @@ -37,9 +37,9 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> --- a/drivers/gpu/drm/i915/display/intel_crtc.c +++ b/drivers/gpu/drm/i915/display/intel_crtc.c -@@ -518,7 +518,8 @@ void intel_pipe_update_start(struct inte +@@ -522,7 +522,8 @@ void intel_pipe_update_start(struct inte */ - intel_psr_wait_for_idle(new_crtc_state); + intel_psr_wait_for_idle_locked(new_crtc_state); - local_irq_disable(); + if (!IS_ENABLED(CONFIG_PREEMPT_RT)) @@ -47,7 +47,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> crtc->debug.min_vbl = min; crtc->debug.max_vbl = max; -@@ -543,11 +544,13 @@ void intel_pipe_update_start(struct inte +@@ -547,11 +548,13 @@ void intel_pipe_update_start(struct inte break; } @@ -63,7 +63,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> } finish_wait(wq, &wait); -@@ -580,7 +583,8 @@ void intel_pipe_update_start(struct inte +@@ -584,7 +587,8 @@ void intel_pipe_update_start(struct inte return; irq_disable: @@ -73,7 +73,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> } #if IS_ENABLED(CONFIG_DRM_I915_DEBUG_VBLANK_EVADE) -@@ -679,7 +683,8 @@ void intel_pipe_update_end(struct intel_ +@@ -685,7 +689,8 @@ void intel_pipe_update_end(struct intel_ */ intel_vrr_send_push(new_crtc_state); diff --git a/debian/patches-rt/0004-fs-dcache-Move-wakeup-out-of-i_seq_dir-write-held-re.patch b/debian/patches-rt/0004-fs-dcache-Move-wakeup-out-of-i_seq_dir-write-held-re.patch new file mode 100644 index 000000000..79cf26bba --- /dev/null +++ b/debian/patches-rt/0004-fs-dcache-Move-wakeup-out-of-i_seq_dir-write-held-re.patch @@ -0,0 +1,115 @@ +From: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +Date: Sun, 12 Jun 2022 16:27:32 +0200 +Subject: [PATCH 4/4] fs/dcache: Move wakeup out of i_seq_dir write held region +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/5.19/older/patches-5.19-rc3-rt5.tar.xz + +__d_add() and __d_move() wake up waiters on dentry::d_wait from within the +i_seq_dir write held region. This violates the PREEMPT_RT constraints as +the wake up acquires wait_queue_head::lock which is a "sleeping" spinlock +on RT. + +There is no requirement to do so. __d_lookup_unhash() has cleared +DCACHE_PAR_LOOKUP and dentry::d_wait and returned the now unreachable wait +queue head pointer to the caller, so the actual wake up can be postponed +until the i_dir_seq write side critical section is left. The only +requirement is that dentry::lock is held across the whole sequence +including the wake up. + +This is safe because: + + 1) The whole sequence including the wake up is protected by dentry::lock. + + 2) The waitqueue head is allocated by the caller on stack and can't go + away until the whole callchain completes. + + 3) If a queued waiter is woken by a spurious wake up, then it is blocked + on dentry:lock before it can observe DCACHE_PAR_LOOKUP cleared and + return from d_wait_lookup(). + + As the wake up is inside the dentry:lock held region it's guaranteed + that the waiters waitq is dequeued from the waitqueue head before the + waiter returns. + + Moving the wake up past the unlock of dentry::lock would allow the + waiter to return with the on stack waitq still enqueued due to a + spurious wake up. + + 4) New waiters have to acquire dentry::lock before checking whether the + DCACHE_PAR_LOOKUP flag is set. + +Move the wake up past end_dir_add() which leaves the i_dir_seq write side +critical section and enables preemption. + +For non RT kernels there is no difference because preemption is still +disabled due to dentry::lock being held, but it shortens the time between +wake up and unlocking dentry::lock, which reduces the contention for the +woken up waiter. + +Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> +Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +Link: https://lkml.kernel.org/r/20220613140712.77932-5-bigeasy@linutronix.de +--- + fs/dcache.c | 15 +++++++++++---- + 1 file changed, 11 insertions(+), 4 deletions(-) + +--- a/fs/dcache.c ++++ b/fs/dcache.c +@@ -2747,13 +2747,15 @@ EXPORT_SYMBOL(__d_lookup_done); + + static inline void __d_add(struct dentry *dentry, struct inode *inode) + { ++ wait_queue_head_t *d_wait; + struct inode *dir = NULL; + unsigned n; ++ + spin_lock(&dentry->d_lock); + if (unlikely(d_in_lookup(dentry))) { + dir = dentry->d_parent->d_inode; + n = start_dir_add(dir); +- wake_up_all(__d_lookup_unhash(dentry)); ++ d_wait = __d_lookup_unhash(dentry); + } + if (inode) { + unsigned add_flags = d_flags_for_inode(inode); +@@ -2764,8 +2766,10 @@ static inline void __d_add(struct dentry + fsnotify_update_flags(dentry); + } + __d_rehash(dentry); +- if (dir) ++ if (dir) { + end_dir_add(dir, n); ++ wake_up_all(d_wait); ++ } + spin_unlock(&dentry->d_lock); + if (inode) + spin_unlock(&inode->i_lock); +@@ -2912,6 +2916,7 @@ static void __d_move(struct dentry *dent + bool exchange) + { + struct dentry *old_parent, *p; ++ wait_queue_head_t *d_wait; + struct inode *dir = NULL; + unsigned n; + +@@ -2942,7 +2947,7 @@ static void __d_move(struct dentry *dent + if (unlikely(d_in_lookup(target))) { + dir = target->d_parent->d_inode; + n = start_dir_add(dir); +- wake_up_all(__d_lookup_unhash(target)); ++ d_wait = __d_lookup_unhash(target); + } + + write_seqcount_begin(&dentry->d_seq); +@@ -2977,8 +2982,10 @@ static void __d_move(struct dentry *dent + write_seqcount_end(&target->d_seq); + write_seqcount_end(&dentry->d_seq); + +- if (dir) ++ if (dir) { + end_dir_add(dir, n); ++ wake_up_all(d_wait); ++ } + + if (dentry->d_parent != old_parent) + spin_unlock(&dentry->d_parent->d_lock); diff --git a/debian/patches-rt/0004-printk-wake-up-all-waiters.patch b/debian/patches-rt/0004-printk-wake-up-all-waiters.patch deleted file mode 100644 index 5c6f5f4c2..000000000 --- a/debian/patches-rt/0004-printk-wake-up-all-waiters.patch +++ /dev/null @@ -1,28 +0,0 @@ -From: John Ogness <john.ogness@linutronix.de> -Date: Wed, 20 Apr 2022 01:52:26 +0206 -Subject: [PATCH 04/18] printk: wake up all waiters -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/5.18/older/patches-5.18-rt11.tar.xz - -There can be multiple tasks waiting for new records. They should -all be woken. Use wake_up_interruptible_all() instead of -wake_up_interruptible(). - -Signed-off-by: John Ogness <john.ogness@linutronix.de> -Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> -Link: https://lore.kernel.org/r/20220419234637.357112-5-john.ogness@linutronix.de -Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> ---- - kernel/printk/printk.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - ---- a/kernel/printk/printk.c -+++ b/kernel/printk/printk.c -@@ -3326,7 +3326,7 @@ static void wake_up_klogd_work_func(stru - } - - if (pending & PRINTK_PENDING_WAKEUP) -- wake_up_interruptible(&log_wait); -+ wake_up_interruptible_all(&log_wait); - } - - static DEFINE_PER_CPU(struct irq_work, wake_up_klogd_work) = diff --git a/debian/patches-rt/0004-scsi-bnx2fc-Avoid-using-get_cpu-in-bnx2fc_cmd_alloc.patch b/debian/patches-rt/0004-scsi-bnx2fc-Avoid-using-get_cpu-in-bnx2fc_cmd_alloc.patch deleted file mode 100644 index 2e0eed851..000000000 --- a/debian/patches-rt/0004-scsi-bnx2fc-Avoid-using-get_cpu-in-bnx2fc_cmd_alloc.patch +++ /dev/null @@ -1,56 +0,0 @@ -From: Sebastian Andrzej Siewior <bigeasy@linutronix.de> -Date: Fri, 6 May 2022 12:57:58 +0200 -Subject: [PATCH 4/4] scsi: bnx2fc: Avoid using get_cpu() in - bnx2fc_cmd_alloc(). -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/5.18/older/patches-5.18-rt11.tar.xz - -Using get_cpu() leads to disabling preemption and in this context it is -not possible to acquire the following spinlock_t on PREEMPT_RT because -it becomes a sleeping lock. - -Commit - 0ea5c27583e1c ("[SCSI] bnx2fc: common free list for cleanup commands") - -says that it is using get_cpu() as a fix in case the CPU is preempted. -While this might be true, the important part is that it is now using the -same CPU for locking and unlocking while previously it always relied on -smp_processor_id(). -The date structure itself is protected with a lock so it does not rely -on CPU-local access. - -Replace get_cpu() with raw_smp_processor_id() to obtain the current CPU -number which is used as an index for the per-CPU resource. - -Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> -Link: https://lore.kernel.org/r/20220506105758.283887-5-bigeasy@linutronix.de ---- - drivers/scsi/bnx2fc/bnx2fc_io.c | 4 +--- - 1 file changed, 1 insertion(+), 3 deletions(-) - ---- a/drivers/scsi/bnx2fc/bnx2fc_io.c -+++ b/drivers/scsi/bnx2fc/bnx2fc_io.c -@@ -472,7 +472,7 @@ struct bnx2fc_cmd *bnx2fc_cmd_alloc(stru - u32 free_sqes; - u32 max_sqes; - u16 xid; -- int index = get_cpu(); -+ int index = raw_smp_processor_id(); - - max_sqes = BNX2FC_SCSI_MAX_SQES; - /* -@@ -485,7 +485,6 @@ struct bnx2fc_cmd *bnx2fc_cmd_alloc(stru - (tgt->num_active_ios.counter >= max_sqes) || - (free_sqes + max_sqes <= BNX2FC_SQ_WQES_MAX)) { - spin_unlock_bh(&cmd_mgr->free_list_lock[index]); -- put_cpu(); - return NULL; - } - -@@ -498,7 +497,6 @@ struct bnx2fc_cmd *bnx2fc_cmd_alloc(stru - atomic_inc(&tgt->num_active_ios); - atomic_dec(&tgt->free_sqes); - spin_unlock_bh(&cmd_mgr->free_list_lock[index]); -- put_cpu(); - - INIT_LIST_HEAD(&io_req->link); - diff --git a/debian/patches-rt/0005-drm-i915-Don-t-check-for-atomic-context-on-PREEMPT_R.patch b/debian/patches-rt/0005-drm-i915-Don-t-check-for-atomic-context-on-PREEMPT_R.patch index 18e1d3661..90b943718 100644 --- a/debian/patches-rt/0005-drm-i915-Don-t-check-for-atomic-context-on-PREEMPT_R.patch +++ b/debian/patches-rt/0005-drm-i915-Don-t-check-for-atomic-context-on-PREEMPT_R.patch @@ -1,7 +1,7 @@ From: Sebastian Andrzej Siewior <bigeasy@linutronix.de> Date: Mon, 25 Oct 2021 15:05:18 +0200 Subject: [PATCH 05/10] drm/i915: Don't check for atomic context on PREEMPT_RT -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/5.18/older/patches-5.18-rt11.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/5.19/older/patches-5.19-rc3-rt5.tar.xz The !in_atomic() check in _wait_for_atomic() triggers on PREEMPT_RT because the uncore::lock is a spinlock_t and does not disable @@ -19,7 +19,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> --- a/drivers/gpu/drm/i915/i915_utils.h +++ b/drivers/gpu/drm/i915/i915_utils.h -@@ -329,7 +329,7 @@ wait_remaining_ms_from_jiffies(unsigned +@@ -334,7 +334,7 @@ wait_remaining_ms_from_jiffies(unsigned #define wait_for(COND, MS) _wait_for((COND), (MS) * 1000, 10, 1000) /* If CONFIG_PREEMPT_COUNT is disabled, in_atomic() always reports false. */ diff --git a/debian/patches-rt/0005-ptrace-Remove-arch_ptrace_attach.patch b/debian/patches-rt/0005-ptrace-Remove-arch_ptrace_attach.patch deleted file mode 100644 index 5961cc7ee..000000000 --- a/debian/patches-rt/0005-ptrace-Remove-arch_ptrace_attach.patch +++ /dev/null @@ -1,158 +0,0 @@ -From: "Eric W. Biederman" <ebiederm@xmission.com> -Date: Thu, 5 May 2022 13:26:38 -0500 -Subject: [PATCH 05/12] ptrace: Remove arch_ptrace_attach -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/5.18/older/patches-5.18-rt11.tar.xz - -The last remaining implementation of arch_ptrace_attach is ia64's -ptrace_attach_sync_user_rbs which was added at the end of 2007 in -commit aa91a2e90044 ("[IA64] Synchronize RBS on PTRACE_ATTACH"). - -Reading the comments and examining the code ptrace_attach_sync_user_rbs -has the sole purpose of saving registers to the stack when ptrace_attach -changes TASK_STOPPED to TASK_TRACED. In all other cases arch_ptrace_stop -takes care of the register saving. - -In commit d79fdd6d96f4 ("ptrace: Clean transitions between TASK_STOPPED and TRACED") -modified ptrace_attach to wake up the thread and enter ptrace_stop normally even -when the thread starts out stopped. - -This makes ptrace_attach_sync_user_rbs completely unnecessary. So just -remove it. - -Cc: linux-ia64@vger.kernel.org -Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com> -Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> -Link: https://lore.kernel.org/r/20220505182645.497868-5-ebiederm@xmission.com ---- - arch/ia64/include/asm/ptrace.h | 4 -- - arch/ia64/kernel/ptrace.c | 57 ----------------------------------------- - kernel/ptrace.c | 18 ------------ - 3 files changed, 79 deletions(-) - ---- a/arch/ia64/include/asm/ptrace.h -+++ b/arch/ia64/include/asm/ptrace.h -@@ -139,10 +139,6 @@ static inline long regs_return_value(str - #define arch_ptrace_stop_needed() \ - (!test_thread_flag(TIF_RESTORE_RSE)) - -- extern void ptrace_attach_sync_user_rbs (struct task_struct *); -- #define arch_ptrace_attach(child) \ -- ptrace_attach_sync_user_rbs(child) -- - #define arch_has_single_step() (1) - #define arch_has_block_step() (1) - ---- a/arch/ia64/kernel/ptrace.c -+++ b/arch/ia64/kernel/ptrace.c -@@ -618,63 +618,6 @@ void ia64_sync_krbs(void) - } - - /* -- * After PTRACE_ATTACH, a thread's register backing store area in user -- * space is assumed to contain correct data whenever the thread is -- * stopped. arch_ptrace_stop takes care of this on tracing stops. -- * But if the child was already stopped for job control when we attach -- * to it, then it might not ever get into ptrace_stop by the time we -- * want to examine the user memory containing the RBS. -- */ --void --ptrace_attach_sync_user_rbs (struct task_struct *child) --{ -- int stopped = 0; -- struct unw_frame_info info; -- -- /* -- * If the child is in TASK_STOPPED, we need to change that to -- * TASK_TRACED momentarily while we operate on it. This ensures -- * that the child won't be woken up and return to user mode while -- * we are doing the sync. (It can only be woken up for SIGKILL.) -- */ -- -- read_lock(&tasklist_lock); -- if (child->sighand) { -- spin_lock_irq(&child->sighand->siglock); -- if (READ_ONCE(child->__state) == TASK_STOPPED && -- !test_and_set_tsk_thread_flag(child, TIF_RESTORE_RSE)) { -- set_notify_resume(child); -- -- WRITE_ONCE(child->__state, TASK_TRACED); -- stopped = 1; -- } -- spin_unlock_irq(&child->sighand->siglock); -- } -- read_unlock(&tasklist_lock); -- -- if (!stopped) -- return; -- -- unw_init_from_blocked_task(&info, child); -- do_sync_rbs(&info, ia64_sync_user_rbs); -- -- /* -- * Now move the child back into TASK_STOPPED if it should be in a -- * job control stop, so that SIGCONT can be used to wake it up. -- */ -- read_lock(&tasklist_lock); -- if (child->sighand) { -- spin_lock_irq(&child->sighand->siglock); -- if (READ_ONCE(child->__state) == TASK_TRACED && -- (child->signal->flags & SIGNAL_STOP_STOPPED)) { -- WRITE_ONCE(child->__state, TASK_STOPPED); -- } -- spin_unlock_irq(&child->sighand->siglock); -- } -- read_unlock(&tasklist_lock); --} -- --/* - * Write f32-f127 back to task->thread.fph if it has been modified. - */ - inline void ---- a/kernel/ptrace.c -+++ b/kernel/ptrace.c -@@ -1285,10 +1285,6 @@ int ptrace_request(struct task_struct *c - return ret; - } - --#ifndef arch_ptrace_attach --#define arch_ptrace_attach(child) do { } while (0) --#endif -- - SYSCALL_DEFINE4(ptrace, long, request, long, pid, unsigned long, addr, - unsigned long, data) - { -@@ -1297,8 +1293,6 @@ SYSCALL_DEFINE4(ptrace, long, request, l - - if (request == PTRACE_TRACEME) { - ret = ptrace_traceme(); -- if (!ret) -- arch_ptrace_attach(current); - goto out; - } - -@@ -1310,12 +1304,6 @@ SYSCALL_DEFINE4(ptrace, long, request, l - - if (request == PTRACE_ATTACH || request == PTRACE_SEIZE) { - ret = ptrace_attach(child, request, addr, data); -- /* -- * Some architectures need to do book-keeping after -- * a ptrace attach. -- */ -- if (!ret) -- arch_ptrace_attach(child); - goto out_put_task_struct; - } - -@@ -1455,12 +1443,6 @@ COMPAT_SYSCALL_DEFINE4(ptrace, compat_lo - - if (request == PTRACE_ATTACH || request == PTRACE_SEIZE) { - ret = ptrace_attach(child, request, addr, data); -- /* -- * Some architectures need to do book-keeping after -- * a ptrace attach. -- */ -- if (!ret) -- arch_ptrace_attach(child); - goto out_put_task_struct; - } - diff --git a/debian/patches-rt/0006-drm-i915-Disable-tracing-points-on-PREEMPT_RT.patch b/debian/patches-rt/0006-drm-i915-Disable-tracing-points-on-PREEMPT_RT.patch index e54d1e790..236f6967b 100644 --- a/debian/patches-rt/0006-drm-i915-Disable-tracing-points-on-PREEMPT_RT.patch +++ b/debian/patches-rt/0006-drm-i915-Disable-tracing-points-on-PREEMPT_RT.patch @@ -1,7 +1,7 @@ From: Sebastian Andrzej Siewior <bigeasy@linutronix.de> Date: Thu, 6 Dec 2018 09:52:20 +0100 Subject: [PATCH 06/10] drm/i915: Disable tracing points on PREEMPT_RT -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/5.18/older/patches-5.18-rt11.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/5.19/older/patches-5.19-rc3-rt5.tar.xz Luca Abeni reported this: | BUG: scheduling while atomic: kworker/u8:2/15203/0x00000003 diff --git a/debian/patches-rt/0006-printk-get-caller_id-timestamp-after-migration-disab.patch b/debian/patches-rt/0006-printk-get-caller_id-timestamp-after-migration-disab.patch deleted file mode 100644 index 95aed34b8..000000000 --- a/debian/patches-rt/0006-printk-get-caller_id-timestamp-after-migration-disab.patch +++ /dev/null @@ -1,67 +0,0 @@ -From: John Ogness <john.ogness@linutronix.de> -Date: Wed, 20 Apr 2022 01:52:28 +0206 -Subject: [PATCH 06/18] printk: get caller_id/timestamp after migration disable -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/5.18/older/patches-5.18-rt11.tar.xz - -Currently the local CPU timestamp and caller_id for the record are -collected while migration is enabled. Since this information is -CPU-specific, it should be collected with migration disabled. - -Migration is disabled immediately after collecting this information -anyway, so just move the information collection to after the -migration disabling. - -Signed-off-by: John Ogness <john.ogness@linutronix.de> -Reviewed-by: Sergey Senozhatsky <senozhatsky@chromium.org> -Reviewed-by: Petr Mladek <pmladek@suse.com> -Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> -Link: https://lore.kernel.org/r/20220419234637.357112-7-john.ogness@linutronix.de -Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> ---- - kernel/printk/printk.c | 10 ++++++---- - 1 file changed, 6 insertions(+), 4 deletions(-) - ---- a/kernel/printk/printk.c -+++ b/kernel/printk/printk.c -@@ -2063,7 +2063,7 @@ static inline void printk_delay(void) - static inline u32 printk_caller_id(void) - { - return in_task() ? task_pid_nr(current) : -- 0x80000000 + raw_smp_processor_id(); -+ 0x80000000 + smp_processor_id(); - } - - /** -@@ -2145,7 +2145,6 @@ int vprintk_store(int facility, int leve - const struct dev_printk_info *dev_info, - const char *fmt, va_list args) - { -- const u32 caller_id = printk_caller_id(); - struct prb_reserved_entry e; - enum printk_info_flags flags = 0; - struct printk_record r; -@@ -2155,10 +2154,14 @@ int vprintk_store(int facility, int leve - u8 *recursion_ptr; - u16 reserve_size; - va_list args2; -+ u32 caller_id; - u16 text_len; - int ret = 0; - u64 ts_nsec; - -+ if (!printk_enter_irqsave(recursion_ptr, irqflags)) -+ return 0; -+ - /* - * Since the duration of printk() can vary depending on the message - * and state of the ringbuffer, grab the timestamp now so that it is -@@ -2167,8 +2170,7 @@ int vprintk_store(int facility, int leve - */ - ts_nsec = local_clock(); - -- if (!printk_enter_irqsave(recursion_ptr, irqflags)) -- return 0; -+ caller_id = printk_caller_id(); - - /* - * The sprintf needs to come first since the syslog prefix might be diff --git a/debian/patches-rt/0006-signal-Use-lockdep_assert_held-instead-of-assert_spi.patch b/debian/patches-rt/0006-signal-Use-lockdep_assert_held-instead-of-assert_spi.patch deleted file mode 100644 index cc56c6697..000000000 --- a/debian/patches-rt/0006-signal-Use-lockdep_assert_held-instead-of-assert_spi.patch +++ /dev/null @@ -1,40 +0,0 @@ -From: "Eric W. Biederman" <ebiederm@xmission.com> -Date: Thu, 5 May 2022 13:26:39 -0500 -Subject: [PATCH 06/12] signal: Use lockdep_assert_held instead of - assert_spin_locked -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/5.18/older/patches-5.18-rt11.tar.xz - -The distinction is that assert_spin_locked() checks if the lock is -held *by*anyone* whereas lockdep_assert_held() asserts the current -context holds the lock. Also, the check goes away if you build -without lockdep. - -Suggested-by: Peter Zijlstra <peterz@infradead.org> -Link: https://lkml.kernel.org/r/Ympr/+PX4XgT/UKU@hirez.programming.kicks-ass.net -Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com> -Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> -Link: https://lore.kernel.org/r/20220505182645.497868-6-ebiederm@xmission.com ---- - kernel/signal.c | 4 ++-- - 1 file changed, 2 insertions(+), 2 deletions(-) - ---- a/kernel/signal.c -+++ b/kernel/signal.c -@@ -884,7 +884,7 @@ static int check_kill_permission(int sig - static void ptrace_trap_notify(struct task_struct *t) - { - WARN_ON_ONCE(!(t->ptrace & PT_SEIZED)); -- assert_spin_locked(&t->sighand->siglock); -+ lockdep_assert_held(&t->sighand->siglock); - - task_set_jobctl_pending(t, JOBCTL_TRAP_NOTIFY); - ptrace_signal_wake_up(t, t->jobctl & JOBCTL_LISTENING); -@@ -1079,7 +1079,7 @@ static int __send_signal_locked(int sig, - int override_rlimit; - int ret = 0, result; - -- assert_spin_locked(&t->sighand->siglock); -+ lockdep_assert_held(&t->sighand->siglock); - - result = TRACE_SIGNAL_IGNORED; - if (!prepare_signal(sig, t, force)) diff --git a/debian/patches-rt/0007-drm-i915-skip-DRM_I915_LOW_LEVEL_TRACEPOINTS-with-NO.patch b/debian/patches-rt/0007-drm-i915-skip-DRM_I915_LOW_LEVEL_TRACEPOINTS-with-NO.patch index ca185b4c8..3a3ba7236 100644 --- a/debian/patches-rt/0007-drm-i915-skip-DRM_I915_LOW_LEVEL_TRACEPOINTS-with-NO.patch +++ b/debian/patches-rt/0007-drm-i915-skip-DRM_I915_LOW_LEVEL_TRACEPOINTS-with-NO.patch @@ -2,7 +2,7 @@ From: Sebastian Andrzej Siewior <bigeasy@linutronix.de> Date: Wed, 19 Dec 2018 10:47:02 +0100 Subject: [PATCH 07/10] drm/i915: skip DRM_I915_LOW_LEVEL_TRACEPOINTS with NOTRACE -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/5.18/older/patches-5.18-rt11.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/5.19/older/patches-5.19-rc3-rt5.tar.xz The order of the header files is important. If this header file is included after tracepoint.h was included then the NOTRACE here becomes a diff --git a/debian/patches-rt/0007-printk-call-boot_delay_msec-in-printk_delay.patch b/debian/patches-rt/0007-printk-call-boot_delay_msec-in-printk_delay.patch deleted file mode 100644 index 998764af5..000000000 --- a/debian/patches-rt/0007-printk-call-boot_delay_msec-in-printk_delay.patch +++ /dev/null @@ -1,42 +0,0 @@ -From: John Ogness <john.ogness@linutronix.de> -Date: Wed, 20 Apr 2022 01:52:29 +0206 -Subject: [PATCH 07/18] printk: call boot_delay_msec() in printk_delay() -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/5.18/older/patches-5.18-rt11.tar.xz - -boot_delay_msec() is always called immediately before printk_delay() -so just call it from within printk_delay(). - -Signed-off-by: John Ogness <john.ogness@linutronix.de> -Reviewed-by: Sergey Senozhatsky <senozhatsky@chromium.org> -Reviewed-by: Petr Mladek <pmladek@suse.com> -Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> -Link: https://lore.kernel.org/r/20220419234637.357112-8-john.ogness@linutronix.de -Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> ---- - kernel/printk/printk.c | 7 ++++--- - 1 file changed, 4 insertions(+), 3 deletions(-) - ---- a/kernel/printk/printk.c -+++ b/kernel/printk/printk.c -@@ -2048,8 +2048,10 @@ static u8 *__printk_recursion_counter(vo - - int printk_delay_msec __read_mostly; - --static inline void printk_delay(void) -+static inline void printk_delay(int level) - { -+ boot_delay_msec(level); -+ - if (unlikely(printk_delay_msec)) { - int m = printk_delay_msec; - -@@ -2274,8 +2276,7 @@ asmlinkage int vprintk_emit(int facility - in_sched = true; - } - -- boot_delay_msec(level); -- printk_delay(); -+ printk_delay(level); - - printed_len = vprintk_store(facility, level, dev_info, fmt, args); - diff --git a/debian/patches-rt/0008-drm-i915-gt-Queue-and-wait-for-the-irq_work-item.patch b/debian/patches-rt/0008-drm-i915-gt-Queue-and-wait-for-the-irq_work-item.patch index 62d30e591..b11b2d1cf 100644 --- a/debian/patches-rt/0008-drm-i915-gt-Queue-and-wait-for-the-irq_work-item.patch +++ b/debian/patches-rt/0008-drm-i915-gt-Queue-and-wait-for-the-irq_work-item.patch @@ -1,7 +1,7 @@ From: Sebastian Andrzej Siewior <bigeasy@linutronix.de> Date: Wed, 8 Sep 2021 17:18:00 +0200 Subject: [PATCH 08/10] drm/i915/gt: Queue and wait for the irq_work item. -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/5.18/older/patches-5.18-rt11.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/5.19/older/patches-5.19-rc3-rt5.tar.xz Disabling interrupts and invoking the irq_work function directly breaks on PREEMPT_RT. @@ -27,7 +27,7 @@ Reviewed-by: Maarten Lankhorst <maarten.lankhorst@linux.intel.com> --- a/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c +++ b/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c -@@ -311,10 +311,9 @@ void __intel_breadcrumbs_park(struct int +@@ -312,10 +312,9 @@ void __intel_breadcrumbs_park(struct int /* Kick the work once more to drain the signalers, and disarm the irq */ irq_work_sync(&b->irq_work); while (READ_ONCE(b->irq_armed) && !atomic_read(&b->active)) { diff --git a/debian/patches-rt/0008-printk-add-con_printk-macro-for-console-details.patch b/debian/patches-rt/0008-printk-add-con_printk-macro-for-console-details.patch deleted file mode 100644 index 0d90f1225..000000000 --- a/debian/patches-rt/0008-printk-add-con_printk-macro-for-console-details.patch +++ /dev/null @@ -1,56 +0,0 @@ -From: John Ogness <john.ogness@linutronix.de> -Date: Wed, 20 Apr 2022 01:52:30 +0206 -Subject: [PATCH 08/18] printk: add con_printk() macro for console details -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/5.18/older/patches-5.18-rt11.tar.xz - -It is useful to generate log messages that include details about -the related console. Rather than duplicate the code to assemble -the details, put that code into a macro con_printk(). - -Once console printers become threaded, this macro will find more -users. - -Signed-off-by: John Ogness <john.ogness@linutronix.de> -Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> -Link: https://lore.kernel.org/r/20220419234637.357112-9-john.ogness@linutronix.de -Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> ---- - kernel/printk/printk.c | 13 +++++++------ - 1 file changed, 7 insertions(+), 6 deletions(-) - ---- a/kernel/printk/printk.c -+++ b/kernel/printk/printk.c -@@ -3015,6 +3015,11 @@ static void try_enable_default_console(s - newcon->flags |= CON_CONSDEV; - } - -+#define con_printk(lvl, con, fmt, ...) \ -+ printk(lvl pr_fmt("%sconsole [%s%d] " fmt), \ -+ (con->flags & CON_BOOT) ? "boot" : "", \ -+ con->name, con->index, ##__VA_ARGS__) -+ - /* - * The console driver calls this routine during kernel initialization - * to register the console printing procedure with printk() and to -@@ -3153,9 +3158,7 @@ void register_console(struct console *ne - * users know there might be something in the kernel's log buffer that - * went to the bootconsole (that they do not see on the real console) - */ -- pr_info("%sconsole [%s%d] enabled\n", -- (newcon->flags & CON_BOOT) ? "boot" : "" , -- newcon->name, newcon->index); -+ con_printk(KERN_INFO, newcon, "enabled\n"); - if (bootcon_enabled && - ((newcon->flags & (CON_CONSDEV | CON_BOOT)) == CON_CONSDEV) && - !keep_bootcon) { -@@ -3174,9 +3177,7 @@ int unregister_console(struct console *c - struct console *con; - int res; - -- pr_info("%sconsole [%s%d] disabled\n", -- (console->flags & CON_BOOT) ? "boot" : "" , -- console->name, console->index); -+ con_printk(KERN_INFO, console, "disabled\n"); - - res = _braille_unregister_console(console); - if (res < 0) diff --git a/debian/patches-rt/0008-ptrace-Document-that-wait_task_inactive-can-t-fail.patch b/debian/patches-rt/0008-ptrace-Document-that-wait_task_inactive-can-t-fail.patch deleted file mode 100644 index 205a7a92e..000000000 --- a/debian/patches-rt/0008-ptrace-Document-that-wait_task_inactive-can-t-fail.patch +++ /dev/null @@ -1,66 +0,0 @@ -From: "Eric W. Biederman" <ebiederm@xmission.com> -Date: Thu, 5 May 2022 13:26:41 -0500 -Subject: [PATCH 08/12] ptrace: Document that wait_task_inactive can't fail -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/5.18/older/patches-5.18-rt11.tar.xz - -After ptrace_freeze_traced succeeds it is known that the the tracee -has a __state value of __TASK_TRACED and that no __ptrace_unlink will -happen because the tracer is waiting for the tracee, and the tracee is -in ptrace_stop. - -The function ptrace_freeze_traced can succeed at any point after -ptrace_stop has set TASK_TRACED and dropped siglock. The read_lock on -tasklist_lock only excludes ptrace_attach. - -This means that the !current->ptrace which executes under a read_lock -of tasklist_lock will never see a ptrace_freeze_trace as the tracer -must have gone away before the tasklist_lock was taken and -ptrace_attach can not occur until the read_lock is dropped. As -ptrace_freeze_traced depends upon ptrace_attach running before it can -run that excludes ptrace_freeze_traced until __state is set to -TASK_RUNNING. This means that task_is_traced will fail in -ptrace_freeze_attach and ptrace_freeze_attached will fail. - -On the current->ptrace branch of ptrace_stop which will be reached any -time after ptrace_freeze_traced has succeed it is known that __state -is __TASK_TRACED and schedule() will be called with that state. - -Use a WARN_ON_ONCE to document that wait_task_inactive(TASK_TRACED) -should never fail. Remove the stale comment about may_ptrace_stop. - -Strictly speaking this is not true because if PREEMPT_RT is enabled -wait_task_inactive can fail because __state can be changed. I don't -see this as a problem as the ptrace code is currently broken on -PREMPT_RT, and this is one of the issues. Failing and warning when -the assumptions of the code are broken is good. - -Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com> -Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> -Link: https://lore.kernel.org/r/20220505182645.497868-8-ebiederm@xmission.com ---- - kernel/ptrace.c | 14 +++----------- - 1 file changed, 3 insertions(+), 11 deletions(-) - ---- a/kernel/ptrace.c -+++ b/kernel/ptrace.c -@@ -266,17 +266,9 @@ static int ptrace_check_attach(struct ta - } - read_unlock(&tasklist_lock); - -- if (!ret && !ignore_state) { -- if (!wait_task_inactive(child, __TASK_TRACED)) { -- /* -- * This can only happen if may_ptrace_stop() fails and -- * ptrace_stop() changes ->state back to TASK_RUNNING, -- * so we should not worry about leaking __TASK_TRACED. -- */ -- WARN_ON(READ_ONCE(child->__state) == __TASK_TRACED); -- ret = -ESRCH; -- } -- } -+ if (!ret && !ignore_state && -+ WARN_ON_ONCE(!wait_task_inactive(child, __TASK_TRACED))) -+ ret = -ESRCH; - - return ret; - } diff --git a/debian/patches-rt/0009-drm-i915-gt-Use-spin_lock_irq-instead-of-local_irq_d.patch b/debian/patches-rt/0009-drm-i915-gt-Use-spin_lock_irq-instead-of-local_irq_d.patch index d484abbf7..3390be54f 100644 --- a/debian/patches-rt/0009-drm-i915-gt-Use-spin_lock_irq-instead-of-local_irq_d.patch +++ b/debian/patches-rt/0009-drm-i915-gt-Use-spin_lock_irq-instead-of-local_irq_d.patch @@ -2,7 +2,7 @@ From: Sebastian Andrzej Siewior <bigeasy@linutronix.de> Date: Wed, 8 Sep 2021 19:03:41 +0200 Subject: [PATCH 09/10] drm/i915/gt: Use spin_lock_irq() instead of local_irq_disable() + spin_lock() -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/5.18/older/patches-5.18-rt11.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/5.19/older/patches-5.19-rc3-rt5.tar.xz execlists_dequeue() is invoked from a function which uses local_irq_disable() to disable interrupts so the spin_lock() behaves @@ -28,7 +28,7 @@ Reviewed-by: Maarten Lankhorst <maarten.lankhorst@linux.intel.com> --- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c +++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c -@@ -1290,7 +1290,7 @@ static void execlists_dequeue(struct int +@@ -1289,7 +1289,7 @@ static void execlists_dequeue(struct int * and context switches) submission. */ @@ -37,7 +37,7 @@ Reviewed-by: Maarten Lankhorst <maarten.lankhorst@linux.intel.com> /* * If the queue is higher priority than the last -@@ -1390,7 +1390,7 @@ static void execlists_dequeue(struct int +@@ -1389,7 +1389,7 @@ static void execlists_dequeue(struct int * Even if ELSP[1] is occupied and not worthy * of timeslices, our queue might be. */ @@ -46,7 +46,7 @@ Reviewed-by: Maarten Lankhorst <maarten.lankhorst@linux.intel.com> return; } } -@@ -1416,7 +1416,7 @@ static void execlists_dequeue(struct int +@@ -1415,7 +1415,7 @@ static void execlists_dequeue(struct int if (last && !can_merge_rq(last, rq)) { spin_unlock(&ve->base.sched_engine->lock); @@ -55,7 +55,7 @@ Reviewed-by: Maarten Lankhorst <maarten.lankhorst@linux.intel.com> return; /* leave this for another sibling */ } -@@ -1578,7 +1578,7 @@ static void execlists_dequeue(struct int +@@ -1577,7 +1577,7 @@ static void execlists_dequeue(struct int */ sched_engine->queue_priority_hint = queue_prio(sched_engine); i915_sched_engine_reset_on_empty(sched_engine); @@ -64,7 +64,7 @@ Reviewed-by: Maarten Lankhorst <maarten.lankhorst@linux.intel.com> /* * We can skip poking the HW if we ended up with exactly the same set -@@ -1604,13 +1604,6 @@ static void execlists_dequeue(struct int +@@ -1603,13 +1603,6 @@ static void execlists_dequeue(struct int } } @@ -78,7 +78,7 @@ Reviewed-by: Maarten Lankhorst <maarten.lankhorst@linux.intel.com> static void clear_ports(struct i915_request **ports, int count) { memset_p((void **)ports, NULL, count); -@@ -2431,7 +2424,7 @@ static void execlists_submission_tasklet +@@ -2439,7 +2432,7 @@ static void execlists_submission_tasklet } if (!engine->execlists.pending[0]) { diff --git a/debian/patches-rt/0009-printk-refactor-and-rework-printing-logic.patch b/debian/patches-rt/0009-printk-refactor-and-rework-printing-logic.patch deleted file mode 100644 index 3f8a2309a..000000000 --- a/debian/patches-rt/0009-printk-refactor-and-rework-printing-logic.patch +++ /dev/null @@ -1,618 +0,0 @@ -From: John Ogness <john.ogness@linutronix.de> -Date: Wed, 20 Apr 2022 01:52:31 +0206 -Subject: [PATCH 09/18] printk: refactor and rework printing logic -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/5.18/older/patches-5.18-rt11.tar.xz - -Refactor/rework printing logic in order to prepare for moving to -threaded console printing. - -- Move @console_seq into struct console so that the current - "position" of each console can be tracked individually. - -- Move @console_dropped into struct console so that the current drop - count of each console can be tracked individually. - -- Modify printing logic so that each console independently loads, - prepares, and prints its next record. - -- Remove exclusive_console logic. Since console positions are - handled independently, replaying past records occurs naturally. - -- Update the comments explaining why preemption is disabled while - printing from printk() context. - -With these changes, there is a change in behavior: the console -replaying the log (formerly exclusive console) will no longer block -other consoles. New messages appear on the other consoles while the -newly added console is still replaying. - -Signed-off-by: John Ogness <john.ogness@linutronix.de> -Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> -Link: https://lore.kernel.org/r/20220419234637.357112-10-john.ogness@linutronix.de -Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> ---- - include/linux/console.h | 2 - kernel/printk/printk.c | 441 ++++++++++++++++++++++++------------------------ - 2 files changed, 230 insertions(+), 213 deletions(-) - ---- a/include/linux/console.h -+++ b/include/linux/console.h -@@ -151,6 +151,8 @@ struct console { - int cflag; - uint ispeed; - uint ospeed; -+ u64 seq; -+ unsigned long dropped; - void *data; - struct console *next; - }; ---- a/kernel/printk/printk.c -+++ b/kernel/printk/printk.c -@@ -281,11 +281,6 @@ static bool panic_in_progress(void) - static int console_locked, console_suspended; - - /* -- * If exclusive_console is non-NULL then only this console is to be printed to. -- */ --static struct console *exclusive_console; -- --/* - * Array of consoles built from command line options (console=) - */ - -@@ -374,12 +369,6 @@ static u64 syslog_seq; - static size_t syslog_partial; - static bool syslog_time; - --/* All 3 protected by @console_sem. */ --/* the next printk record to write to the console */ --static u64 console_seq; --static u64 exclusive_console_stop_seq; --static unsigned long console_dropped; -- - struct latched_seq { - seqcount_latch_t latch; - u64 val[2]; -@@ -1933,47 +1922,26 @@ static int console_trylock_spinning(void - } - - /* -- * Call the console drivers, asking them to write out -- * log_buf[start] to log_buf[end - 1]. -- * The console_lock must be held. -+ * Call the specified console driver, asking it to write out the specified -+ * text and length. For non-extended consoles, if any records have been -+ * dropped, a dropped message will be written out first. - */ --static void call_console_drivers(const char *ext_text, size_t ext_len, -- const char *text, size_t len) -+static void call_console_driver(struct console *con, const char *text, size_t len) - { - static char dropped_text[64]; -- size_t dropped_len = 0; -- struct console *con; -+ size_t dropped_len; - - trace_console_rcuidle(text, len); - -- if (!console_drivers) -- return; -- -- if (console_dropped) { -+ if (con->dropped && !(con->flags & CON_EXTENDED)) { - dropped_len = snprintf(dropped_text, sizeof(dropped_text), - "** %lu printk messages dropped **\n", -- console_dropped); -- console_dropped = 0; -+ con->dropped); -+ con->dropped = 0; -+ con->write(con, dropped_text, dropped_len); - } - -- for_each_console(con) { -- if (exclusive_console && con != exclusive_console) -- continue; -- if (!(con->flags & CON_ENABLED)) -- continue; -- if (!con->write) -- continue; -- if (!cpu_online(smp_processor_id()) && -- !(con->flags & CON_ANYTIME)) -- continue; -- if (con->flags & CON_EXTENDED) -- con->write(con, ext_text, ext_len); -- else { -- if (dropped_len) -- con->write(con, dropped_text, dropped_len); -- con->write(con, text, len); -- } -- } -+ con->write(con, text, len); - } - - /* -@@ -2283,15 +2251,18 @@ asmlinkage int vprintk_emit(int facility - /* If called from the scheduler, we can not call up(). */ - if (!in_sched) { - /* -- * Disable preemption to avoid being preempted while holding -- * console_sem which would prevent anyone from printing to -- * console -+ * The caller may be holding system-critical or -+ * timing-sensitive locks. Disable preemption during -+ * printing of all remaining records to all consoles so that -+ * this context can return as soon as possible. Hopefully -+ * another printk() caller will take over the printing. - */ - preempt_disable(); - /* - * Try to acquire and then immediately release the console -- * semaphore. The release will print out buffers and wake up -- * /dev/kmsg and syslog() users. -+ * semaphore. The release will print out buffers. With the -+ * spinning variant, this context tries to take over the -+ * printing from another printing context. - */ - if (console_trylock_spinning()) - console_unlock(); -@@ -2329,11 +2300,9 @@ EXPORT_SYMBOL(_printk); - - #define prb_read_valid(rb, seq, r) false - #define prb_first_valid_seq(rb) 0 -+#define prb_next_seq(rb) 0 - - static u64 syslog_seq; --static u64 console_seq; --static u64 exclusive_console_stop_seq; --static unsigned long console_dropped; - - static size_t record_print_text(const struct printk_record *r, - bool syslog, bool time) -@@ -2350,8 +2319,7 @@ static ssize_t msg_print_ext_body(char * - struct dev_printk_info *dev_info) { return 0; } - static void console_lock_spinning_enable(void) { } - static int console_lock_spinning_disable_and_check(void) { return 0; } --static void call_console_drivers(const char *ext_text, size_t ext_len, -- const char *text, size_t len) {} -+static void call_console_driver(struct console *con, const char *text, size_t len) { } - static bool suppress_message_printing(int level) { return false; } - - #endif /* CONFIG_PRINTK */ -@@ -2622,22 +2590,6 @@ int is_console_locked(void) - EXPORT_SYMBOL(is_console_locked); - - /* -- * Check if we have any console that is capable of printing while cpu is -- * booting or shutting down. Requires console_sem. -- */ --static int have_callable_console(void) --{ -- struct console *con; -- -- for_each_console(con) -- if ((con->flags & CON_ENABLED) && -- (con->flags & CON_ANYTIME)) -- return 1; -- -- return 0; --} -- --/* - * Return true when this CPU should unlock console_sem without pushing all - * messages to the console. This reduces the chance that the console is - * locked when the panic CPU tries to use it. -@@ -2657,15 +2609,182 @@ static bool abandon_console_lock_in_pani - } - - /* -- * Can we actually use the console at this time on this cpu? -+ * Check if the given console is currently capable and allowed to print -+ * records. -+ * -+ * Requires the console_lock. -+ */ -+static inline bool console_is_usable(struct console *con) -+{ -+ if (!(con->flags & CON_ENABLED)) -+ return false; -+ -+ if (!con->write) -+ return false; -+ -+ /* -+ * Console drivers may assume that per-cpu resources have been -+ * allocated. So unless they're explicitly marked as being able to -+ * cope (CON_ANYTIME) don't call them until this CPU is officially up. -+ */ -+ if (!cpu_online(raw_smp_processor_id()) && -+ !(con->flags & CON_ANYTIME)) -+ return false; -+ -+ return true; -+} -+ -+static void __console_unlock(void) -+{ -+ console_locked = 0; -+ up_console_sem(); -+} -+ -+/* -+ * Print one record for the given console. The record printed is whatever -+ * record is the next available record for the given console. -+ * -+ * @handover will be set to true if a printk waiter has taken over the -+ * console_lock, in which case the caller is no longer holding the -+ * console_lock. Otherwise it is set to false. -+ * -+ * Returns false if the given console has no next record to print, otherwise -+ * true. -+ * -+ * Requires the console_lock. -+ */ -+static bool console_emit_next_record(struct console *con, bool *handover) -+{ -+ static char ext_text[CONSOLE_EXT_LOG_MAX]; -+ static char text[CONSOLE_LOG_MAX]; -+ static int panic_console_dropped; -+ struct printk_info info; -+ struct printk_record r; -+ unsigned long flags; -+ char *write_text; -+ size_t len; -+ -+ prb_rec_init_rd(&r, &info, text, sizeof(text)); -+ -+ *handover = false; -+ -+ if (!prb_read_valid(prb, con->seq, &r)) -+ return false; -+ -+ if (con->seq != r.info->seq) { -+ con->dropped += r.info->seq - con->seq; -+ con->seq = r.info->seq; -+ if (panic_in_progress() && panic_console_dropped++ > 10) { -+ suppress_panic_printk = 1; -+ pr_warn_once("Too many dropped messages. Suppress messages on non-panic CPUs to prevent livelock.\n"); -+ } -+ } -+ -+ /* Skip record that has level above the console loglevel. */ -+ if (suppress_message_printing(r.info->level)) { -+ con->seq++; -+ goto skip; -+ } -+ -+ if (con->flags & CON_EXTENDED) { -+ write_text = &ext_text[0]; -+ len = info_print_ext_header(ext_text, sizeof(ext_text), r.info); -+ len += msg_print_ext_body(ext_text + len, sizeof(ext_text) - len, -+ &r.text_buf[0], r.info->text_len, &r.info->dev_info); -+ } else { -+ write_text = &text[0]; -+ len = record_print_text(&r, console_msg_format & MSG_FORMAT_SYSLOG, printk_time); -+ } -+ -+ /* -+ * While actively printing out messages, if another printk() -+ * were to occur on another CPU, it may wait for this one to -+ * finish. This task can not be preempted if there is a -+ * waiter waiting to take over. -+ * -+ * Interrupts are disabled because the hand over to a waiter -+ * must not be interrupted until the hand over is completed -+ * (@console_waiter is cleared). -+ */ -+ printk_safe_enter_irqsave(flags); -+ console_lock_spinning_enable(); -+ -+ stop_critical_timings(); /* don't trace print latency */ -+ call_console_driver(con, write_text, len); -+ start_critical_timings(); -+ -+ con->seq++; -+ -+ *handover = console_lock_spinning_disable_and_check(); -+ printk_safe_exit_irqrestore(flags); -+skip: -+ return true; -+} -+ -+/* -+ * Print out all remaining records to all consoles. -+ * -+ * @do_cond_resched is set by the caller. It can be true only in schedulable -+ * context. -+ * -+ * @next_seq is set to the sequence number after the last available record. -+ * The value is valid only when this function returns true. It means that all -+ * usable consoles are completely flushed. - * -- * Console drivers may assume that per-cpu resources have been allocated. So -- * unless they're explicitly marked as being able to cope (CON_ANYTIME) don't -- * call them until this CPU is officially up. -+ * @handover will be set to true if a printk waiter has taken over the -+ * console_lock, in which case the caller is no longer holding the -+ * console_lock. Otherwise it is set to false. -+ * -+ * Returns true when there was at least one usable console and all messages -+ * were flushed to all usable consoles. A returned false informs the caller -+ * that everything was not flushed (either there were no usable consoles or -+ * another context has taken over printing or it is a panic situation and this -+ * is not the panic CPU). Regardless the reason, the caller should assume it -+ * is not useful to immediately try again. -+ * -+ * Requires the console_lock. - */ --static inline int can_use_console(void) -+static bool console_flush_all(bool do_cond_resched, u64 *next_seq, bool *handover) - { -- return cpu_online(raw_smp_processor_id()) || have_callable_console(); -+ bool any_usable = false; -+ struct console *con; -+ bool any_progress; -+ -+ *next_seq = 0; -+ *handover = false; -+ -+ do { -+ any_progress = false; -+ -+ for_each_console(con) { -+ bool progress; -+ -+ if (!console_is_usable(con)) -+ continue; -+ any_usable = true; -+ -+ progress = console_emit_next_record(con, handover); -+ if (*handover) -+ return false; -+ -+ /* Track the next of the highest seq flushed. */ -+ if (con->seq > *next_seq) -+ *next_seq = con->seq; -+ -+ if (!progress) -+ continue; -+ any_progress = true; -+ -+ /* Allow panic_cpu to take over the consoles safely. */ -+ if (abandon_console_lock_in_panic()) -+ return false; -+ -+ if (do_cond_resched) -+ cond_resched(); -+ } -+ } while (any_progress); -+ -+ return any_usable; - } - - /** -@@ -2678,28 +2797,20 @@ static inline int can_use_console(void) - * by printk(). If this is the case, console_unlock(); emits - * the output prior to releasing the lock. - * -- * If there is output waiting, we wake /dev/kmsg and syslog() users. -- * - * console_unlock(); may be called from any context. - */ - void console_unlock(void) - { -- static char ext_text[CONSOLE_EXT_LOG_MAX]; -- static char text[CONSOLE_LOG_MAX]; -- static int panic_console_dropped; -- unsigned long flags; -- bool do_cond_resched, retry; -- struct printk_info info; -- struct printk_record r; -- u64 __maybe_unused next_seq; -+ bool do_cond_resched; -+ bool handover; -+ bool flushed; -+ u64 next_seq; - - if (console_suspended) { - up_console_sem(); - return; - } - -- prb_rec_init_rd(&r, &info, text, sizeof(text)); -- - /* - * Console drivers are called with interrupts disabled, so - * @console_may_schedule should be cleared before; however, we may -@@ -2708,125 +2819,34 @@ void console_unlock(void) - * between lines if allowable. Not doing so can cause a very long - * scheduling stall on a slow console leading to RCU stall and - * softlockup warnings which exacerbate the issue with more -- * messages practically incapacitating the system. -- * -- * console_trylock() is not able to detect the preemptive -- * context reliably. Therefore the value must be stored before -- * and cleared after the "again" goto label. -+ * messages practically incapacitating the system. Therefore, create -+ * a local to use for the printing loop. - */ - do_cond_resched = console_may_schedule; --again: -- console_may_schedule = 0; - -- /* -- * We released the console_sem lock, so we need to recheck if -- * cpu is online and (if not) is there at least one CON_ANYTIME -- * console. -- */ -- if (!can_use_console()) { -- console_locked = 0; -- up_console_sem(); -- return; -- } -- -- for (;;) { -- size_t ext_len = 0; -- int handover; -- size_t len; -- --skip: -- if (!prb_read_valid(prb, console_seq, &r)) -- break; -- -- if (console_seq != r.info->seq) { -- console_dropped += r.info->seq - console_seq; -- console_seq = r.info->seq; -- if (panic_in_progress() && panic_console_dropped++ > 10) { -- suppress_panic_printk = 1; -- pr_warn_once("Too many dropped messages. Suppress messages on non-panic CPUs to prevent livelock.\n"); -- } -- } -- -- if (suppress_message_printing(r.info->level)) { -- /* -- * Skip record we have buffered and already printed -- * directly to the console when we received it, and -- * record that has level above the console loglevel. -- */ -- console_seq++; -- goto skip; -- } -+ do { -+ console_may_schedule = 0; - -- /* Output to all consoles once old messages replayed. */ -- if (unlikely(exclusive_console && -- console_seq >= exclusive_console_stop_seq)) { -- exclusive_console = NULL; -- } -+ flushed = console_flush_all(do_cond_resched, &next_seq, &handover); -+ if (!handover) -+ __console_unlock(); - - /* -- * Handle extended console text first because later -- * record_print_text() will modify the record buffer in-place. -+ * Abort if there was a failure to flush all messages to all -+ * usable consoles. Either it is not possible to flush (in -+ * which case it would be an infinite loop of retrying) or -+ * another context has taken over printing. - */ -- if (nr_ext_console_drivers) { -- ext_len = info_print_ext_header(ext_text, -- sizeof(ext_text), -- r.info); -- ext_len += msg_print_ext_body(ext_text + ext_len, -- sizeof(ext_text) - ext_len, -- &r.text_buf[0], -- r.info->text_len, -- &r.info->dev_info); -- } -- len = record_print_text(&r, -- console_msg_format & MSG_FORMAT_SYSLOG, -- printk_time); -- console_seq++; -+ if (!flushed) -+ break; - - /* -- * While actively printing out messages, if another printk() -- * were to occur on another CPU, it may wait for this one to -- * finish. This task can not be preempted if there is a -- * waiter waiting to take over. -- * -- * Interrupts are disabled because the hand over to a waiter -- * must not be interrupted until the hand over is completed -- * (@console_waiter is cleared). -+ * Some context may have added new records after -+ * console_flush_all() but before unlocking the console. -+ * Re-check if there is a new record to flush. If the trylock -+ * fails, another context is already handling the printing. - */ -- printk_safe_enter_irqsave(flags); -- console_lock_spinning_enable(); -- -- stop_critical_timings(); /* don't trace print latency */ -- call_console_drivers(ext_text, ext_len, text, len); -- start_critical_timings(); -- -- handover = console_lock_spinning_disable_and_check(); -- printk_safe_exit_irqrestore(flags); -- if (handover) -- return; -- -- /* Allow panic_cpu to take over the consoles safely */ -- if (abandon_console_lock_in_panic()) -- break; -- -- if (do_cond_resched) -- cond_resched(); -- } -- -- /* Get consistent value of the next-to-be-used sequence number. */ -- next_seq = console_seq; -- -- console_locked = 0; -- up_console_sem(); -- -- /* -- * Someone could have filled up the buffer again, so re-check if there's -- * something to flush. In case we cannot trylock the console_sem again, -- * there's a new owner and the console_unlock() from them will do the -- * flush, no worries. -- */ -- retry = prb_read_valid(prb, next_seq, NULL); -- if (retry && !abandon_console_lock_in_panic() && console_trylock()) -- goto again; -+ } while (prb_read_valid(prb, next_seq, NULL) && console_trylock()); - } - EXPORT_SYMBOL(console_unlock); - -@@ -2886,8 +2906,14 @@ void console_flush_on_panic(enum con_flu - console_trylock(); - console_may_schedule = 0; - -- if (mode == CONSOLE_REPLAY_ALL) -- console_seq = prb_first_valid_seq(prb); -+ if (mode == CONSOLE_REPLAY_ALL) { -+ struct console *c; -+ u64 seq; -+ -+ seq = prb_first_valid_seq(prb); -+ for_each_console(c) -+ c->seq = seq; -+ } - console_unlock(); - } - -@@ -3127,26 +3153,15 @@ void register_console(struct console *ne - if (newcon->flags & CON_EXTENDED) - nr_ext_console_drivers++; - -+ newcon->dropped = 0; - if (newcon->flags & CON_PRINTBUFFER) { -- /* -- * console_unlock(); will print out the buffered messages -- * for us. -- * -- * We're about to replay the log buffer. Only do this to the -- * just-registered console to avoid excessive message spam to -- * the already-registered consoles. -- * -- * Set exclusive_console with disabled interrupts to reduce -- * race window with eventual console_flush_on_panic() that -- * ignores console_lock. -- */ -- exclusive_console = newcon; -- exclusive_console_stop_seq = console_seq; -- - /* Get a consistent copy of @syslog_seq. */ - mutex_lock(&syslog_lock); -- console_seq = syslog_seq; -+ newcon->seq = syslog_seq; - mutex_unlock(&syslog_lock); -+ } else { -+ /* Begin with next message. */ -+ newcon->seq = prb_next_seq(prb); - } - console_unlock(); - console_sysfs_notify(); diff --git a/debian/patches-rt/0009-ptrace-Admit-ptrace_stop-can-generate-spuriuos-SIGTR.patch b/debian/patches-rt/0009-ptrace-Admit-ptrace_stop-can-generate-spuriuos-SIGTR.patch deleted file mode 100644 index 9c8ce7771..000000000 --- a/debian/patches-rt/0009-ptrace-Admit-ptrace_stop-can-generate-spuriuos-SIGTR.patch +++ /dev/null @@ -1,198 +0,0 @@ -From: "Eric W. Biederman" <ebiederm@xmission.com> -Date: Thu, 5 May 2022 13:26:42 -0500 -Subject: [PATCH 09/12] ptrace: Admit ptrace_stop can generate spuriuos - SIGTRAPs -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/5.18/older/patches-5.18-rt11.tar.xz - -Long ago and far away there was a BUG_ON at the start of ptrace_stop -that did "BUG_ON(!(current->ptrace & PT_PTRACED));" [1]. The BUG_ON -had never triggered but examination of the code showed that the BUG_ON -could actually trigger. To complement removing the BUG_ON an attempt -to better handle the race was added. - -The code detected the tracer had gone away and did not call -do_notify_parent_cldstop. The code also attempted to prevent -ptrace_report_syscall from sending spurious SIGTRAPs when the tracer -went away. - -The code to detect when the tracer had gone away before sending a -signal to tracer was a legitimate fix and continues to work to this -date. - -The code to prevent sending spurious SIGTRAPs is a failure. At the -time and until today the code only catches it when the tracer goes -away after siglock is dropped and before read_lock is acquired. If -the tracer goes away after read_lock is dropped a spurious SIGTRAP can -still be sent to the tracee. The tracer going away after read_lock -is dropped is the far likelier case as it is the bigger window. - -Given that the attempt to prevent the generation of a SIGTRAP was a -failure and continues to be a failure remove the code that attempts to -do that. This simplifies the code in ptrace_stop and makes -ptrace_stop much easier to reason about. - -To successfully deal with the tracer going away, all of the tracer's -instrumentation of the child would need to be removed, and reliably -detecting when the tracer has set a signal to continue with would need -to be implemented. - -[1] 66519f549ae5 ("[PATCH] fix ptracer death race yielding bogus BUG_ON") - -History-Tree: https://git.kernel.org/pub/scm/linux/kernel/git/tglx/history.git -Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com> -Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> -Link: https://lore.kernel.org/r/20220505182645.497868-9-ebiederm@xmission.com ---- - kernel/signal.c | 92 +++++++++++++++++++++++--------------------------------- - 1 file changed, 38 insertions(+), 54 deletions(-) - ---- a/kernel/signal.c -+++ b/kernel/signal.c -@@ -2187,13 +2187,12 @@ static void do_notify_parent_cldstop(str - * with. If the code did not stop because the tracer is gone, - * the stop signal remains unchanged unless clear_code. - */ --static int ptrace_stop(int exit_code, int why, int clear_code, -- unsigned long message, kernel_siginfo_t *info) -+static int ptrace_stop(int exit_code, int why, unsigned long message, -+ kernel_siginfo_t *info) - __releases(¤t->sighand->siglock) - __acquires(¤t->sighand->siglock) - { - bool gstop_done = false; -- bool read_code = true; - - if (arch_ptrace_stop_needed()) { - /* -@@ -2212,7 +2211,14 @@ static int ptrace_stop(int exit_code, in - /* - * schedule() will not sleep if there is a pending signal that - * can awaken the task. -+ * -+ * After this point ptrace_signal_wake_up will clear TASK_TRACED -+ * if ptrace_unlink happens. Handle previous ptrace_unlinks -+ * here to prevent ptrace_stop sleeping in schedule. - */ -+ if (!current->ptrace) -+ return exit_code; -+ - set_special_state(TASK_TRACED); - - /* -@@ -2259,54 +2265,33 @@ static int ptrace_stop(int exit_code, in - - spin_unlock_irq(¤t->sighand->siglock); - read_lock(&tasklist_lock); -- if (likely(current->ptrace)) { -- /* -- * Notify parents of the stop. -- * -- * While ptraced, there are two parents - the ptracer and -- * the real_parent of the group_leader. The ptracer should -- * know about every stop while the real parent is only -- * interested in the completion of group stop. The states -- * for the two don't interact with each other. Notify -- * separately unless they're gonna be duplicates. -- */ -+ /* -+ * Notify parents of the stop. -+ * -+ * While ptraced, there are two parents - the ptracer and -+ * the real_parent of the group_leader. The ptracer should -+ * know about every stop while the real parent is only -+ * interested in the completion of group stop. The states -+ * for the two don't interact with each other. Notify -+ * separately unless they're gonna be duplicates. -+ */ -+ if (current->ptrace) - do_notify_parent_cldstop(current, true, why); -- if (gstop_done && ptrace_reparented(current)) -- do_notify_parent_cldstop(current, false, why); -- -- /* -- * Don't want to allow preemption here, because -- * sys_ptrace() needs this task to be inactive. -- * -- * XXX: implement read_unlock_no_resched(). -- */ -- preempt_disable(); -- read_unlock(&tasklist_lock); -- cgroup_enter_frozen(); -- preempt_enable_no_resched(); -- freezable_schedule(); -- cgroup_leave_frozen(true); -- } else { -- /* -- * By the time we got the lock, our tracer went away. -- * Don't drop the lock yet, another tracer may come. -- * -- * If @gstop_done, the ptracer went away between group stop -- * completion and here. During detach, it would have set -- * JOBCTL_STOP_PENDING on us and we'll re-enter -- * TASK_STOPPED in do_signal_stop() on return, so notifying -- * the real parent of the group stop completion is enough. -- */ -- if (gstop_done) -- do_notify_parent_cldstop(current, false, why); -+ if (gstop_done && (!current->ptrace || ptrace_reparented(current))) -+ do_notify_parent_cldstop(current, false, why); - -- /* tasklist protects us from ptrace_freeze_traced() */ -- __set_current_state(TASK_RUNNING); -- read_code = false; -- if (clear_code) -- exit_code = 0; -- read_unlock(&tasklist_lock); -- } -+ /* -+ * Don't want to allow preemption here, because -+ * sys_ptrace() needs this task to be inactive. -+ * -+ * XXX: implement read_unlock_no_resched(). -+ */ -+ preempt_disable(); -+ read_unlock(&tasklist_lock); -+ cgroup_enter_frozen(); -+ preempt_enable_no_resched(); -+ freezable_schedule(); -+ cgroup_leave_frozen(true); - - /* - * We are back. Now reacquire the siglock before touching -@@ -2314,8 +2299,7 @@ static int ptrace_stop(int exit_code, in - * any signal-sending on another CPU that wants to examine it. - */ - spin_lock_irq(¤t->sighand->siglock); -- if (read_code) -- exit_code = current->exit_code; -+ exit_code = current->exit_code; - current->last_siginfo = NULL; - current->ptrace_message = 0; - current->exit_code = 0; -@@ -2343,7 +2327,7 @@ static int ptrace_do_notify(int signr, i - info.si_uid = from_kuid_munged(current_user_ns(), current_uid()); - - /* Let the debugger run. */ -- return ptrace_stop(exit_code, why, 1, message, &info); -+ return ptrace_stop(exit_code, why, message, &info); - } - - int ptrace_notify(int exit_code, unsigned long message) -@@ -2515,7 +2499,7 @@ static void do_jobctl_trap(void) - CLD_STOPPED, 0); - } else { - WARN_ON_ONCE(!signr); -- ptrace_stop(signr, CLD_STOPPED, 0, 0, NULL); -+ ptrace_stop(signr, CLD_STOPPED, 0, NULL); - } - } - -@@ -2568,7 +2552,7 @@ static int ptrace_signal(int signr, kern - * comment in dequeue_signal(). - */ - current->jobctl |= JOBCTL_STOP_DEQUEUED; -- signr = ptrace_stop(signr, CLD_TRAPPED, 0, 0, info); -+ signr = ptrace_stop(signr, CLD_TRAPPED, 0, info); - - /* We're back. Did the debugger cancel the sig? */ - if (signr == 0) diff --git a/debian/patches-rt/0010-drm-i915-Drop-the-irqs_disabled-check.patch b/debian/patches-rt/0010-drm-i915-Drop-the-irqs_disabled-check.patch index 14e355c2f..9e62ebe72 100644 --- a/debian/patches-rt/0010-drm-i915-Drop-the-irqs_disabled-check.patch +++ b/debian/patches-rt/0010-drm-i915-Drop-the-irqs_disabled-check.patch @@ -1,7 +1,7 @@ From: Sebastian Andrzej Siewior <bigeasy@linutronix.de> Date: Fri, 1 Oct 2021 20:01:03 +0200 Subject: [PATCH 10/10] drm/i915: Drop the irqs_disabled() check -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/5.18/older/patches-5.18-rt11.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/5.19/older/patches-5.19-rc3-rt5.tar.xz The !irqs_disabled() check triggers on PREEMPT_RT even with i915_sched_engine::lock acquired. The reason is the lock is transformed diff --git a/debian/patches-rt/0010-printk-move-buffer-definitions-into-console_emit_nex.patch b/debian/patches-rt/0010-printk-move-buffer-definitions-into-console_emit_nex.patch deleted file mode 100644 index c513dca7f..000000000 --- a/debian/patches-rt/0010-printk-move-buffer-definitions-into-console_emit_nex.patch +++ /dev/null @@ -1,184 +0,0 @@ -From: John Ogness <john.ogness@linutronix.de> -Date: Wed, 20 Apr 2022 01:52:32 +0206 -Subject: [PATCH 10/18] printk: move buffer definitions into - console_emit_next_record() caller -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/5.18/older/patches-5.18-rt11.tar.xz - -Extended consoles print extended messages and do not print messages about -dropped records. - -Non-extended consoles print "normal" messages as well as extra messages -about dropped records. - -Currently the buffers for these various message types are defined within -the functions that might use them and their usage is based upon the -CON_EXTENDED flag. This will be a problem when moving to kthread printers -because each printer must be able to provide its own buffers. - -Move all the message buffer definitions outside of -console_emit_next_record(). The caller knows if extended or dropped -messages should be printed and can specify the appropriate buffers to -use. The console_emit_next_record() and call_console_driver() functions -can know what to print based on whether specified buffers are non-NULL. - -With this change, buffer definition/allocation/specification is separated -from the code that does the various types of string printing. - -Signed-off-by: John Ogness <john.ogness@linutronix.de> -Reviewed-by: Petr Mladek <pmladek@suse.com> -Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> -Link: https://lore.kernel.org/r/20220419234637.357112-11-john.ogness@linutronix.de -Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> ---- - kernel/printk/printk.c | 60 +++++++++++++++++++++++++++++++++++-------------- - 1 file changed, 43 insertions(+), 17 deletions(-) - ---- a/kernel/printk/printk.c -+++ b/kernel/printk/printk.c -@@ -394,6 +394,9 @@ static struct latched_seq clear_seq = { - /* the maximum size of a formatted record (i.e. with prefix added per line) */ - #define CONSOLE_LOG_MAX 1024 - -+/* the maximum size for a dropped text message */ -+#define DROPPED_TEXT_MAX 64 -+ - /* the maximum size allowed to be reserved for a record */ - #define LOG_LINE_MAX (CONSOLE_LOG_MAX - PREFIX_MAX) - -@@ -1923,18 +1926,18 @@ static int console_trylock_spinning(void - - /* - * Call the specified console driver, asking it to write out the specified -- * text and length. For non-extended consoles, if any records have been -+ * text and length. If @dropped_text is non-NULL and any records have been - * dropped, a dropped message will be written out first. - */ --static void call_console_driver(struct console *con, const char *text, size_t len) -+static void call_console_driver(struct console *con, const char *text, size_t len, -+ char *dropped_text) - { -- static char dropped_text[64]; - size_t dropped_len; - - trace_console_rcuidle(text, len); - -- if (con->dropped && !(con->flags & CON_EXTENDED)) { -- dropped_len = snprintf(dropped_text, sizeof(dropped_text), -+ if (con->dropped && dropped_text) { -+ dropped_len = snprintf(dropped_text, DROPPED_TEXT_MAX, - "** %lu printk messages dropped **\n", - con->dropped); - con->dropped = 0; -@@ -2296,6 +2299,7 @@ EXPORT_SYMBOL(_printk); - #else /* CONFIG_PRINTK */ - - #define CONSOLE_LOG_MAX 0 -+#define DROPPED_TEXT_MAX 0 - #define printk_time false - - #define prb_read_valid(rb, seq, r) false -@@ -2319,7 +2323,10 @@ static ssize_t msg_print_ext_body(char * - struct dev_printk_info *dev_info) { return 0; } - static void console_lock_spinning_enable(void) { } - static int console_lock_spinning_disable_and_check(void) { return 0; } --static void call_console_driver(struct console *con, const char *text, size_t len) { } -+static void call_console_driver(struct console *con, const char *text, size_t len, -+ char *dropped_text) -+{ -+} - static bool suppress_message_printing(int level) { return false; } - - #endif /* CONFIG_PRINTK */ -@@ -2644,6 +2651,14 @@ static void __console_unlock(void) - * Print one record for the given console. The record printed is whatever - * record is the next available record for the given console. - * -+ * @text is a buffer of size CONSOLE_LOG_MAX. -+ * -+ * If extended messages should be printed, @ext_text is a buffer of size -+ * CONSOLE_EXT_LOG_MAX. Otherwise @ext_text must be NULL. -+ * -+ * If dropped messages should be printed, @dropped_text is a buffer of size -+ * DROPPED_TEXT_MAX. Otherwise @dropped_text must be NULL. -+ * - * @handover will be set to true if a printk waiter has taken over the - * console_lock, in which case the caller is no longer holding the - * console_lock. Otherwise it is set to false. -@@ -2653,10 +2668,9 @@ static void __console_unlock(void) - * - * Requires the console_lock. - */ --static bool console_emit_next_record(struct console *con, bool *handover) -+static bool console_emit_next_record(struct console *con, char *text, char *ext_text, -+ char *dropped_text, bool *handover) - { -- static char ext_text[CONSOLE_EXT_LOG_MAX]; -- static char text[CONSOLE_LOG_MAX]; - static int panic_console_dropped; - struct printk_info info; - struct printk_record r; -@@ -2664,7 +2678,7 @@ static bool console_emit_next_record(str - char *write_text; - size_t len; - -- prb_rec_init_rd(&r, &info, text, sizeof(text)); -+ prb_rec_init_rd(&r, &info, text, CONSOLE_LOG_MAX); - - *handover = false; - -@@ -2686,13 +2700,13 @@ static bool console_emit_next_record(str - goto skip; - } - -- if (con->flags & CON_EXTENDED) { -- write_text = &ext_text[0]; -- len = info_print_ext_header(ext_text, sizeof(ext_text), r.info); -- len += msg_print_ext_body(ext_text + len, sizeof(ext_text) - len, -+ if (ext_text) { -+ write_text = ext_text; -+ len = info_print_ext_header(ext_text, CONSOLE_EXT_LOG_MAX, r.info); -+ len += msg_print_ext_body(ext_text + len, CONSOLE_EXT_LOG_MAX - len, - &r.text_buf[0], r.info->text_len, &r.info->dev_info); - } else { -- write_text = &text[0]; -+ write_text = text; - len = record_print_text(&r, console_msg_format & MSG_FORMAT_SYSLOG, printk_time); - } - -@@ -2710,7 +2724,7 @@ static bool console_emit_next_record(str - console_lock_spinning_enable(); - - stop_critical_timings(); /* don't trace print latency */ -- call_console_driver(con, write_text, len); -+ call_console_driver(con, write_text, len, dropped_text); - start_critical_timings(); - - con->seq++; -@@ -2746,6 +2760,9 @@ static bool console_emit_next_record(str - */ - static bool console_flush_all(bool do_cond_resched, u64 *next_seq, bool *handover) - { -+ static char dropped_text[DROPPED_TEXT_MAX]; -+ static char ext_text[CONSOLE_EXT_LOG_MAX]; -+ static char text[CONSOLE_LOG_MAX]; - bool any_usable = false; - struct console *con; - bool any_progress; -@@ -2763,7 +2780,16 @@ static bool console_flush_all(bool do_co - continue; - any_usable = true; - -- progress = console_emit_next_record(con, handover); -+ if (con->flags & CON_EXTENDED) { -+ /* Extended consoles do not print "dropped messages". */ -+ progress = console_emit_next_record(con, &text[0], -+ &ext_text[0], NULL, -+ handover); -+ } else { -+ progress = console_emit_next_record(con, &text[0], -+ NULL, &dropped_text[0], -+ handover); -+ } - if (*handover) - return false; - diff --git a/debian/patches-rt/0010-ptrace-Don-t-change-__state.patch b/debian/patches-rt/0010-ptrace-Don-t-change-__state.patch deleted file mode 100644 index dd6016d8c..000000000 --- a/debian/patches-rt/0010-ptrace-Don-t-change-__state.patch +++ /dev/null @@ -1,183 +0,0 @@ -From: "Eric W. Biederman" <ebiederm@xmission.com> -Date: Thu, 5 May 2022 13:26:43 -0500 -Subject: [PATCH 10/12] ptrace: Don't change __state -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/5.18/older/patches-5.18-rt11.tar.xz - -Stop playing with tsk->__state to remove TASK_WAKEKILL while a ptrace -command is executing. - -Instead remove TASK_WAKEKILL from the definition of TASK_TRACED, and -implement a new jobctl flag TASK_PTRACE_FROZEN. This new flag is set -in jobctl_freeze_task and cleared when ptrace_stop is awoken or in -jobctl_unfreeze_task (when ptrace_stop remains asleep). - -In signal_wake_up add __TASK_TRACED to state along with TASK_WAKEKILL -when the wake up is for a fatal signal. Skip adding __TASK_TRACED -when TASK_PTRACE_FROZEN is not set. This has the same effect as -changing TASK_TRACED to __TASK_TRACED as all of the wake_ups that use -TASK_KILLABLE go through signal_wake_up. - -Handle a ptrace_stop being called with a pending fatal signal. -Previously it would have been handled by schedule simply failing to -sleep. As TASK_WAKEKILL is no longer part of TASK_TRACED schedule -will sleep with a fatal_signal_pending. The code in signal_wake_up -guarantees that the code will be awaked by any fatal signal that -codes after TASK_TRACED is set. - -Previously the __state value of __TASK_TRACED was changed to -TASK_RUNNING when woken up or back to TASK_TRACED when the code was -left in ptrace_stop. Now when woken up ptrace_stop now clears -JOBCTL_PTRACE_FROZEN and when left sleeping ptrace_unfreezed_traced -clears JOBCTL_PTRACE_FROZEN. - -Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com> -Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> -Link: https://lore.kernel.org/r/20220505182645.497868-10-ebiederm@xmission.com ---- - include/linux/sched.h | 2 +- - include/linux/sched/jobctl.h | 2 ++ - include/linux/sched/signal.h | 5 +++-- - kernel/ptrace.c | 21 ++++++++------------- - kernel/sched/core.c | 5 +---- - kernel/signal.c | 14 ++++++-------- - 6 files changed, 21 insertions(+), 28 deletions(-) - ---- a/include/linux/sched.h -+++ b/include/linux/sched.h -@@ -103,7 +103,7 @@ struct task_group; - /* Convenience macros for the sake of set_current_state: */ - #define TASK_KILLABLE (TASK_WAKEKILL | TASK_UNINTERRUPTIBLE) - #define TASK_STOPPED (TASK_WAKEKILL | __TASK_STOPPED) --#define TASK_TRACED (TASK_WAKEKILL | __TASK_TRACED) -+#define TASK_TRACED __TASK_TRACED - - #define TASK_IDLE (TASK_UNINTERRUPTIBLE | TASK_NOLOAD) - ---- a/include/linux/sched/jobctl.h -+++ b/include/linux/sched/jobctl.h -@@ -19,6 +19,7 @@ struct task_struct; - #define JOBCTL_TRAPPING_BIT 21 /* switching to TRACED */ - #define JOBCTL_LISTENING_BIT 22 /* ptracer is listening for events */ - #define JOBCTL_TRAP_FREEZE_BIT 23 /* trap for cgroup freezer */ -+#define JOBCTL_PTRACE_FROZEN_BIT 24 /* frozen for ptrace */ - - #define JOBCTL_STOP_DEQUEUED (1UL << JOBCTL_STOP_DEQUEUED_BIT) - #define JOBCTL_STOP_PENDING (1UL << JOBCTL_STOP_PENDING_BIT) -@@ -28,6 +29,7 @@ struct task_struct; - #define JOBCTL_TRAPPING (1UL << JOBCTL_TRAPPING_BIT) - #define JOBCTL_LISTENING (1UL << JOBCTL_LISTENING_BIT) - #define JOBCTL_TRAP_FREEZE (1UL << JOBCTL_TRAP_FREEZE_BIT) -+#define JOBCTL_PTRACE_FROZEN (1UL << JOBCTL_PTRACE_FROZEN_BIT) - - #define JOBCTL_TRAP_MASK (JOBCTL_TRAP_STOP | JOBCTL_TRAP_NOTIFY) - #define JOBCTL_PENDING_MASK (JOBCTL_STOP_PENDING | JOBCTL_TRAP_MASK) ---- a/include/linux/sched/signal.h -+++ b/include/linux/sched/signal.h -@@ -435,9 +435,10 @@ extern void calculate_sigpending(void); - - extern void signal_wake_up_state(struct task_struct *t, unsigned int state); - --static inline void signal_wake_up(struct task_struct *t, bool resume) -+static inline void signal_wake_up(struct task_struct *t, bool fatal) - { -- signal_wake_up_state(t, resume ? TASK_WAKEKILL : 0); -+ fatal = fatal && !(t->jobctl & JOBCTL_PTRACE_FROZEN); -+ signal_wake_up_state(t, fatal ? TASK_WAKEKILL | __TASK_TRACED : 0); - } - static inline void ptrace_signal_wake_up(struct task_struct *t, bool resume) - { ---- a/kernel/ptrace.c -+++ b/kernel/ptrace.c -@@ -197,7 +197,7 @@ static bool ptrace_freeze_traced(struct - spin_lock_irq(&task->sighand->siglock); - if (task_is_traced(task) && !looks_like_a_spurious_pid(task) && - !__fatal_signal_pending(task)) { -- WRITE_ONCE(task->__state, __TASK_TRACED); -+ task->jobctl |= JOBCTL_PTRACE_FROZEN; - ret = true; - } - spin_unlock_irq(&task->sighand->siglock); -@@ -207,23 +207,19 @@ static bool ptrace_freeze_traced(struct - - static void ptrace_unfreeze_traced(struct task_struct *task) - { -- if (READ_ONCE(task->__state) != __TASK_TRACED) -- return; -- -- WARN_ON(!task->ptrace || task->parent != current); -+ unsigned long flags; - - /* -- * PTRACE_LISTEN can allow ptrace_trap_notify to wake us up remotely. -- * Recheck state under the lock to close this race. -+ * The child may be awake and may have cleared -+ * JOBCTL_PTRACE_FROZEN (see ptrace_resume). The child will -+ * not set JOBCTL_PTRACE_FROZEN or enter __TASK_TRACED anew. - */ -- spin_lock_irq(&task->sighand->siglock); -- if (READ_ONCE(task->__state) == __TASK_TRACED) { -+ if (lock_task_sighand(task, &flags)) { -+ task->jobctl &= ~JOBCTL_PTRACE_FROZEN; - if (__fatal_signal_pending(task)) - wake_up_state(task, __TASK_TRACED); -- else -- WRITE_ONCE(task->__state, TASK_TRACED); -+ unlock_task_sighand(task, &flags); - } -- spin_unlock_irq(&task->sighand->siglock); - } - - /** -@@ -256,7 +252,6 @@ static int ptrace_check_attach(struct ta - */ - read_lock(&tasklist_lock); - if (child->ptrace && child->parent == current) { -- WARN_ON(READ_ONCE(child->__state) == __TASK_TRACED); - /* - * child->sighand can't be NULL, release_task() - * does ptrace_unlink() before __exit_signal(). ---- a/kernel/sched/core.c -+++ b/kernel/sched/core.c -@@ -6313,10 +6313,7 @@ static void __sched notrace __schedule(u - - /* - * We must load prev->state once (task_struct::state is volatile), such -- * that: -- * -- * - we form a control dependency vs deactivate_task() below. -- * - ptrace_{,un}freeze_traced() can change ->state underneath us. -+ * that we form a control dependency vs deactivate_task() below. - */ - prev_state = READ_ONCE(prev->__state); - if (!(sched_mode & SM_MASK_PREEMPT) && prev_state) { ---- a/kernel/signal.c -+++ b/kernel/signal.c -@@ -2209,14 +2209,12 @@ static int ptrace_stop(int exit_code, in - } - - /* -- * schedule() will not sleep if there is a pending signal that -- * can awaken the task. -- * -- * After this point ptrace_signal_wake_up will clear TASK_TRACED -- * if ptrace_unlink happens. Handle previous ptrace_unlinks -- * here to prevent ptrace_stop sleeping in schedule. -+ * After this point ptrace_signal_wake_up or signal_wake_up -+ * will clear TASK_TRACED if ptrace_unlink happens or a fatal -+ * signal comes in. Handle previous ptrace_unlinks and fatal -+ * signals here to prevent ptrace_stop sleeping in schedule. - */ -- if (!current->ptrace) -+ if (!current->ptrace || __fatal_signal_pending(current)) - return exit_code; - - set_special_state(TASK_TRACED); -@@ -2305,7 +2303,7 @@ static int ptrace_stop(int exit_code, in - current->exit_code = 0; - - /* LISTENING can be set only during STOP traps, clear it */ -- current->jobctl &= ~JOBCTL_LISTENING; -+ current->jobctl &= ~(JOBCTL_LISTENING | JOBCTL_PTRACE_FROZEN); - - /* - * Queued signals ignored us while we were stopped for tracing. diff --git a/debian/patches-rt/0011-printk-add-pr_flush.patch b/debian/patches-rt/0011-printk-add-pr_flush.patch deleted file mode 100644 index 265ad5e00..000000000 --- a/debian/patches-rt/0011-printk-add-pr_flush.patch +++ /dev/null @@ -1,185 +0,0 @@ -From: John Ogness <john.ogness@linutronix.de> -Date: Wed, 20 Apr 2022 01:52:33 +0206 -Subject: [PATCH 11/18] printk: add pr_flush() -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/5.18/older/patches-5.18-rt11.tar.xz - -Provide a might-sleep function to allow waiting for console printers -to catch up to the latest logged message. - -Use pr_flush() whenever it is desirable to get buffered messages -printed before continuing: suspend_console(), resume_console(), -console_stop(), console_start(), console_unblank(). - -Signed-off-by: John Ogness <john.ogness@linutronix.de> -Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> -Link: https://lore.kernel.org/r/20220419234637.357112-12-john.ogness@linutronix.de -Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> ---- - include/linux/printk.h | 7 ++++ - kernel/printk/printk.c | 83 +++++++++++++++++++++++++++++++++++++++++++++++++ - 2 files changed, 90 insertions(+) - ---- a/include/linux/printk.h -+++ b/include/linux/printk.h -@@ -170,6 +170,8 @@ extern void __printk_safe_exit(void); - #define printk_deferred_enter __printk_safe_enter - #define printk_deferred_exit __printk_safe_exit - -+extern bool pr_flush(int timeout_ms, bool reset_on_progress); -+ - /* - * Please don't use printk_ratelimit(), because it shares ratelimiting state - * with all other unrelated printk_ratelimit() callsites. Instead use -@@ -220,6 +222,11 @@ static inline void printk_deferred_exit( - { - } - -+static inline bool pr_flush(int timeout_ms, bool reset_on_progress) -+{ -+ return true; -+} -+ - static inline int printk_ratelimit(void) - { - return 0; ---- a/kernel/printk/printk.c -+++ b/kernel/printk/printk.c -@@ -2296,6 +2296,8 @@ asmlinkage __visible int _printk(const c - } - EXPORT_SYMBOL(_printk); - -+static bool __pr_flush(struct console *con, int timeout_ms, bool reset_on_progress); -+ - #else /* CONFIG_PRINTK */ - - #define CONSOLE_LOG_MAX 0 -@@ -2328,6 +2330,7 @@ static void call_console_driver(struct c - { - } - static bool suppress_message_printing(int level) { return false; } -+static bool __pr_flush(struct console *con, int timeout_ms, bool reset_on_progress) { return true; } - - #endif /* CONFIG_PRINTK */ - -@@ -2515,6 +2518,7 @@ void suspend_console(void) - if (!console_suspend_enabled) - return; - pr_info("Suspending console(s) (use no_console_suspend to debug)\n"); -+ pr_flush(1000, true); - console_lock(); - console_suspended = 1; - up_console_sem(); -@@ -2527,6 +2531,7 @@ void resume_console(void) - down_console_sem(); - console_suspended = 0; - console_unlock(); -+ pr_flush(1000, true); - } - - /** -@@ -2912,6 +2917,9 @@ void console_unblank(void) - if ((c->flags & CON_ENABLED) && c->unblank) - c->unblank(); - console_unlock(); -+ -+ if (!oops_in_progress) -+ pr_flush(1000, true); - } - - /** -@@ -2970,6 +2978,7 @@ struct tty_driver *console_device(int *i - */ - void console_stop(struct console *console) - { -+ __pr_flush(console, 1000, true); - console_lock(); - console->flags &= ~CON_ENABLED; - console_unlock(); -@@ -2981,6 +2990,7 @@ void console_start(struct console *conso - console_lock(); - console->flags |= CON_ENABLED; - console_unlock(); -+ __pr_flush(console, 1000, true); - } - EXPORT_SYMBOL(console_start); - -@@ -3352,6 +3362,79 @@ static int __init printk_late_init(void) - late_initcall(printk_late_init); - - #if defined CONFIG_PRINTK -+/* If @con is specified, only wait for that console. Otherwise wait for all. */ -+static bool __pr_flush(struct console *con, int timeout_ms, bool reset_on_progress) -+{ -+ int remaining = timeout_ms; -+ struct console *c; -+ u64 last_diff = 0; -+ u64 printk_seq; -+ u64 diff; -+ u64 seq; -+ -+ might_sleep(); -+ -+ seq = prb_next_seq(prb); -+ -+ for (;;) { -+ diff = 0; -+ -+ console_lock(); -+ for_each_console(c) { -+ if (con && con != c) -+ continue; -+ if (!console_is_usable(c)) -+ continue; -+ printk_seq = c->seq; -+ if (printk_seq < seq) -+ diff += seq - printk_seq; -+ } -+ console_unlock(); -+ -+ if (diff != last_diff && reset_on_progress) -+ remaining = timeout_ms; -+ -+ if (diff == 0 || remaining == 0) -+ break; -+ -+ if (remaining < 0) { -+ /* no timeout limit */ -+ msleep(100); -+ } else if (remaining < 100) { -+ msleep(remaining); -+ remaining = 0; -+ } else { -+ msleep(100); -+ remaining -= 100; -+ } -+ -+ last_diff = diff; -+ } -+ -+ return (diff == 0); -+} -+ -+/** -+ * pr_flush() - Wait for printing threads to catch up. -+ * -+ * @timeout_ms: The maximum time (in ms) to wait. -+ * @reset_on_progress: Reset the timeout if forward progress is seen. -+ * -+ * A value of 0 for @timeout_ms means no waiting will occur. A value of -1 -+ * represents infinite waiting. -+ * -+ * If @reset_on_progress is true, the timeout will be reset whenever any -+ * printer has been seen to make some forward progress. -+ * -+ * Context: Process context. May sleep while acquiring console lock. -+ * Return: true if all enabled printers are caught up. -+ */ -+bool pr_flush(int timeout_ms, bool reset_on_progress) -+{ -+ return __pr_flush(NULL, timeout_ms, reset_on_progress); -+} -+EXPORT_SYMBOL(pr_flush); -+ - /* - * Delayed printk version, for scheduler-internal messages: - */ diff --git a/debian/patches-rt/0011-ptrace-Always-take-siglock-in-ptrace_resume.patch b/debian/patches-rt/0011-ptrace-Always-take-siglock-in-ptrace_resume.patch deleted file mode 100644 index d57cbd42a..000000000 --- a/debian/patches-rt/0011-ptrace-Always-take-siglock-in-ptrace_resume.patch +++ /dev/null @@ -1,47 +0,0 @@ -From: "Eric W. Biederman" <ebiederm@xmission.com> -Date: Thu, 5 May 2022 13:26:44 -0500 -Subject: [PATCH 11/12] ptrace: Always take siglock in ptrace_resume -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/5.18/older/patches-5.18-rt11.tar.xz - -Make code analysis simpler and future changes easier by -always taking siglock in ptrace_resume. - -Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com> -Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> -Link: https://lore.kernel.org/r/20220505182645.497868-11-ebiederm@xmission.com ---- - kernel/ptrace.c | 13 ++----------- - 1 file changed, 2 insertions(+), 11 deletions(-) - ---- a/kernel/ptrace.c -+++ b/kernel/ptrace.c -@@ -837,8 +837,6 @@ static long ptrace_get_rseq_configuratio - static int ptrace_resume(struct task_struct *child, long request, - unsigned long data) - { -- bool need_siglock; -- - if (!valid_signal(data)) - return -EIO; - -@@ -874,18 +872,11 @@ static int ptrace_resume(struct task_str - * Note that we need siglock even if ->exit_code == data and/or this - * status was not reported yet, the new status must not be cleared by - * wait_task_stopped() after resume. -- * -- * If data == 0 we do not care if wait_task_stopped() reports the old -- * status and clears the code too; this can't race with the tracee, it -- * takes siglock after resume. - */ -- need_siglock = data && !thread_group_empty(current); -- if (need_siglock) -- spin_lock_irq(&child->sighand->siglock); -+ spin_lock_irq(&child->sighand->siglock); - child->exit_code = data; - wake_up_state(child, __TASK_TRACED); -- if (need_siglock) -- spin_unlock_irq(&child->sighand->siglock); -+ spin_unlock_irq(&child->sighand->siglock); - - return 0; - } diff --git a/debian/patches-rt/0012-printk-add-functions-to-prefer-direct-printing.patch b/debian/patches-rt/0012-printk-add-functions-to-prefer-direct-printing.patch deleted file mode 100644 index d30f90707..000000000 --- a/debian/patches-rt/0012-printk-add-functions-to-prefer-direct-printing.patch +++ /dev/null @@ -1,331 +0,0 @@ -From: John Ogness <john.ogness@linutronix.de> -Date: Wed, 20 Apr 2022 01:52:34 +0206 -Subject: [PATCH 12/18] printk: add functions to prefer direct printing -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/5.18/older/patches-5.18-rt11.tar.xz - -Once kthread printing is available, console printing will no longer -occur in the context of the printk caller. However, there are some -special contexts where it is desirable for the printk caller to -directly print out kernel messages. Using pr_flush() to wait for -threaded printers is only possible if the caller is in a sleepable -context and the kthreads are active. That is not always the case. - -Introduce printk_prefer_direct_enter() and printk_prefer_direct_exit() -functions to explicitly (and globally) activate/deactivate preferred -direct console printing. The term "direct console printing" refers to -printing to all enabled consoles from the context of the printk -caller. The term "prefer" is used because this type of printing is -only best effort. If the console is currently locked or other -printers are already actively printing, the printk caller will need -to rely on the other contexts to handle the printing. - -This preferred direct printing is how all printing has been handled -until now (unless it was explicitly deferred). - -When kthread printing is introduced, there may be some unanticipated -problems due to kthreads being unable to flush important messages. -In order to minimize such risks, preferred direct printing is -activated for the primary important messages when the system -experiences general types of major errors. These are: - - - emergency reboot/shutdown - - cpu and rcu stalls - - hard and soft lockups - - hung tasks - - warn - - sysrq - -Note that since kthread printing does not yet exist, no behavior -changes result from this commit. This is only implementing the -counter and marking the various places where preferred direct -printing is active. - -Signed-off-by: John Ogness <john.ogness@linutronix.de> -Reviewed-by: Petr Mladek <pmladek@suse.com> -Acked-by: Paul E. McKenney <paulmck@kernel.org> # for RCU -Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> -Link: https://lore.kernel.org/r/20220419234637.357112-13-john.ogness@linutronix.de -Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> ---- - drivers/tty/sysrq.c | 2 ++ - include/linux/printk.h | 11 +++++++++++ - kernel/hung_task.c | 11 ++++++++++- - kernel/panic.c | 4 ++++ - kernel/printk/printk.c | 28 ++++++++++++++++++++++++++++ - kernel/rcu/tree_stall.h | 2 ++ - kernel/reboot.c | 14 +++++++++++++- - kernel/watchdog.c | 4 ++++ - kernel/watchdog_hld.c | 4 ++++ - 9 files changed, 78 insertions(+), 2 deletions(-) - ---- a/drivers/tty/sysrq.c -+++ b/drivers/tty/sysrq.c -@@ -578,6 +578,7 @@ void __handle_sysrq(int key, bool check_ - - rcu_sysrq_start(); - rcu_read_lock(); -+ printk_prefer_direct_enter(); - /* - * Raise the apparent loglevel to maximum so that the sysrq header - * is shown to provide the user with positive feedback. We do not -@@ -619,6 +620,7 @@ void __handle_sysrq(int key, bool check_ - pr_cont("\n"); - console_loglevel = orig_log_level; - } -+ printk_prefer_direct_exit(); - rcu_read_unlock(); - rcu_sysrq_end(); - ---- a/include/linux/printk.h -+++ b/include/linux/printk.h -@@ -170,6 +170,9 @@ extern void __printk_safe_exit(void); - #define printk_deferred_enter __printk_safe_enter - #define printk_deferred_exit __printk_safe_exit - -+extern void printk_prefer_direct_enter(void); -+extern void printk_prefer_direct_exit(void); -+ - extern bool pr_flush(int timeout_ms, bool reset_on_progress); - - /* -@@ -222,6 +225,14 @@ static inline void printk_deferred_exit( - { - } - -+static inline void printk_prefer_direct_enter(void) -+{ -+} -+ -+static inline void printk_prefer_direct_exit(void) -+{ -+} -+ - static inline bool pr_flush(int timeout_ms, bool reset_on_progress) - { - return true; ---- a/kernel/hung_task.c -+++ b/kernel/hung_task.c -@@ -127,6 +127,8 @@ static void check_hung_task(struct task_ - * complain: - */ - if (sysctl_hung_task_warnings) { -+ printk_prefer_direct_enter(); -+ - if (sysctl_hung_task_warnings > 0) - sysctl_hung_task_warnings--; - pr_err("INFO: task %s:%d blocked for more than %ld seconds.\n", -@@ -142,6 +144,8 @@ static void check_hung_task(struct task_ - - if (sysctl_hung_task_all_cpu_backtrace) - hung_task_show_all_bt = true; -+ -+ printk_prefer_direct_exit(); - } - - touch_nmi_watchdog(); -@@ -204,12 +208,17 @@ static void check_hung_uninterruptible_t - } - unlock: - rcu_read_unlock(); -- if (hung_task_show_lock) -+ if (hung_task_show_lock) { -+ printk_prefer_direct_enter(); - debug_show_all_locks(); -+ printk_prefer_direct_exit(); -+ } - - if (hung_task_show_all_bt) { - hung_task_show_all_bt = false; -+ printk_prefer_direct_enter(); - trigger_all_cpu_backtrace(); -+ printk_prefer_direct_exit(); - } - - if (hung_task_call_panic) ---- a/kernel/panic.c -+++ b/kernel/panic.c -@@ -579,6 +579,8 @@ void __warn(const char *file, int line, - { - disable_trace_on_warning(); - -+ printk_prefer_direct_enter(); -+ - if (file) - pr_warn("WARNING: CPU: %d PID: %d at %s:%d %pS\n", - raw_smp_processor_id(), current->pid, file, line, -@@ -608,6 +610,8 @@ void __warn(const char *file, int line, - - /* Just a warning, don't kill lockdep. */ - add_taint(taint, LOCKDEP_STILL_OK); -+ -+ printk_prefer_direct_exit(); - } - - #ifndef __WARN_FLAGS ---- a/kernel/printk/printk.c -+++ b/kernel/printk/printk.c -@@ -362,6 +362,34 @@ static int console_msg_format = MSG_FORM - static DEFINE_MUTEX(syslog_lock); - - #ifdef CONFIG_PRINTK -+static atomic_t printk_prefer_direct = ATOMIC_INIT(0); -+ -+/** -+ * printk_prefer_direct_enter - cause printk() calls to attempt direct -+ * printing to all enabled consoles -+ * -+ * Since it is not possible to call into the console printing code from any -+ * context, there is no guarantee that direct printing will occur. -+ * -+ * This globally effects all printk() callers. -+ * -+ * Context: Any context. -+ */ -+void printk_prefer_direct_enter(void) -+{ -+ atomic_inc(&printk_prefer_direct); -+} -+ -+/** -+ * printk_prefer_direct_exit - restore printk() behavior -+ * -+ * Context: Any context. -+ */ -+void printk_prefer_direct_exit(void) -+{ -+ WARN_ON(atomic_dec_if_positive(&printk_prefer_direct) < 0); -+} -+ - DECLARE_WAIT_QUEUE_HEAD(log_wait); - /* All 3 protected by @syslog_lock. */ - /* the next printk record to read by syslog(READ) or /proc/kmsg */ ---- a/kernel/rcu/tree_stall.h -+++ b/kernel/rcu/tree_stall.h -@@ -619,6 +619,7 @@ static void print_cpu_stall(unsigned lon - * See Documentation/RCU/stallwarn.rst for info on how to debug - * RCU CPU stall warnings. - */ -+ printk_prefer_direct_enter(); - trace_rcu_stall_warning(rcu_state.name, TPS("SelfDetected")); - pr_err("INFO: %s self-detected stall on CPU\n", rcu_state.name); - raw_spin_lock_irqsave_rcu_node(rdp->mynode, flags); -@@ -656,6 +657,7 @@ static void print_cpu_stall(unsigned lon - */ - set_tsk_need_resched(current); - set_preempt_need_resched(); -+ printk_prefer_direct_exit(); - } - - static void check_cpu_stall(struct rcu_data *rdp) ---- a/kernel/reboot.c -+++ b/kernel/reboot.c -@@ -447,9 +447,11 @@ static int __orderly_reboot(void) - ret = run_cmd(reboot_cmd); - - if (ret) { -+ printk_prefer_direct_enter(); - pr_warn("Failed to start orderly reboot: forcing the issue\n"); - emergency_sync(); - kernel_restart(NULL); -+ printk_prefer_direct_exit(); - } - - return ret; -@@ -462,6 +464,7 @@ static int __orderly_poweroff(bool force - ret = run_cmd(poweroff_cmd); - - if (ret && force) { -+ printk_prefer_direct_enter(); - pr_warn("Failed to start orderly shutdown: forcing the issue\n"); - - /* -@@ -471,6 +474,7 @@ static int __orderly_poweroff(bool force - */ - emergency_sync(); - kernel_power_off(); -+ printk_prefer_direct_exit(); - } - - return ret; -@@ -528,6 +532,8 @@ EXPORT_SYMBOL_GPL(orderly_reboot); - */ - static void hw_failure_emergency_poweroff_func(struct work_struct *work) - { -+ printk_prefer_direct_enter(); -+ - /* - * We have reached here after the emergency shutdown waiting period has - * expired. This means orderly_poweroff has not been able to shut off -@@ -544,6 +550,8 @@ static void hw_failure_emergency_powerof - */ - pr_emerg("Hardware protection shutdown failed. Trying emergency restart\n"); - emergency_restart(); -+ -+ printk_prefer_direct_exit(); - } - - static DECLARE_DELAYED_WORK(hw_failure_emergency_poweroff_work, -@@ -582,11 +590,13 @@ void hw_protection_shutdown(const char * - { - static atomic_t allow_proceed = ATOMIC_INIT(1); - -+ printk_prefer_direct_enter(); -+ - pr_emerg("HARDWARE PROTECTION shutdown (%s)\n", reason); - - /* Shutdown should be initiated only once. */ - if (!atomic_dec_and_test(&allow_proceed)) -- return; -+ goto out; - - /* - * Queue a backup emergency shutdown in the event of -@@ -594,6 +604,8 @@ void hw_protection_shutdown(const char * - */ - hw_failure_emergency_poweroff(ms_until_forced); - orderly_poweroff(true); -+out: -+ printk_prefer_direct_exit(); - } - EXPORT_SYMBOL_GPL(hw_protection_shutdown); - ---- a/kernel/watchdog.c -+++ b/kernel/watchdog.c -@@ -424,6 +424,8 @@ static enum hrtimer_restart watchdog_tim - /* Start period for the next softlockup warning. */ - update_report_ts(); - -+ printk_prefer_direct_enter(); -+ - pr_emerg("BUG: soft lockup - CPU#%d stuck for %us! [%s:%d]\n", - smp_processor_id(), duration, - current->comm, task_pid_nr(current)); -@@ -442,6 +444,8 @@ static enum hrtimer_restart watchdog_tim - add_taint(TAINT_SOFTLOCKUP, LOCKDEP_STILL_OK); - if (softlockup_panic) - panic("softlockup: hung tasks"); -+ -+ printk_prefer_direct_exit(); - } - - return HRTIMER_RESTART; ---- a/kernel/watchdog_hld.c -+++ b/kernel/watchdog_hld.c -@@ -135,6 +135,8 @@ static void watchdog_overflow_callback(s - if (__this_cpu_read(hard_watchdog_warn) == true) - return; - -+ printk_prefer_direct_enter(); -+ - pr_emerg("Watchdog detected hard LOCKUP on cpu %d\n", - this_cpu); - print_modules(); -@@ -155,6 +157,8 @@ static void watchdog_overflow_callback(s - if (hardlockup_panic) - nmi_panic(regs, "Hard LOCKUP"); - -+ printk_prefer_direct_exit(); -+ - __this_cpu_write(hard_watchdog_warn, true); - return; - } diff --git a/debian/patches-rt/0012-sched-signal-ptrace-Rework-TASK_TRACED-TASK_STOPPED-.patch b/debian/patches-rt/0012-sched-signal-ptrace-Rework-TASK_TRACED-TASK_STOPPED-.patch deleted file mode 100644 index 932ad0003..000000000 --- a/debian/patches-rt/0012-sched-signal-ptrace-Rework-TASK_TRACED-TASK_STOPPED-.patch +++ /dev/null @@ -1,213 +0,0 @@ -From: Peter Zijlstra <peterz@infradead.org> -Date: Thu, 5 May 2022 13:26:45 -0500 -Subject: [PATCH 12/12] sched,signal,ptrace: Rework TASK_TRACED, TASK_STOPPED - state -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/5.18/older/patches-5.18-rt11.tar.xz - -Currently ptrace_stop() / do_signal_stop() rely on the special states -TASK_TRACED and TASK_STOPPED resp. to keep unique state. That is, this -state exists only in task->__state and nowhere else. - -There's two spots of bother with this: - - - PREEMPT_RT has task->saved_state which complicates matters, - meaning task_is_{traced,stopped}() needs to check an additional - variable. - - - An alternative freezer implementation that itself relies on a - special TASK state would loose TASK_TRACED/TASK_STOPPED and will - result in misbehaviour. - -As such, add additional state to task->jobctl to track this state -outside of task->__state. - -NOTE: this doesn't actually fix anything yet, just adds extra state. - ---EWB - * didn't add a unnecessary newline in signal.h - * Update t->jobctl in signal_wake_up and ptrace_signal_wake_up - instead of in signal_wake_up_state. This prevents the clearing - of TASK_STOPPED and TASK_TRACED from getting lost. - * Added warnings if JOBCTL_STOPPED or JOBCTL_TRACED are not cleared - -Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> -Link: https://lkml.kernel.org/r/20220421150654.757693825@infradead.org -Signed-off-by: Eric W. Biederman <ebiederm@xmission.com> -Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> -Link: https://lore.kernel.org/r/20220505182645.497868-12-ebiederm@xmission.com ---- - include/linux/sched.h | 8 +++----- - include/linux/sched/jobctl.h | 6 ++++++ - include/linux/sched/signal.h | 19 +++++++++++++++---- - kernel/ptrace.c | 16 +++++++++++++--- - kernel/signal.c | 10 ++++++++-- - 5 files changed, 45 insertions(+), 14 deletions(-) - ---- a/include/linux/sched.h -+++ b/include/linux/sched.h -@@ -118,11 +118,9 @@ struct task_group; - - #define task_is_running(task) (READ_ONCE((task)->__state) == TASK_RUNNING) - --#define task_is_traced(task) ((READ_ONCE(task->__state) & __TASK_TRACED) != 0) -- --#define task_is_stopped(task) ((READ_ONCE(task->__state) & __TASK_STOPPED) != 0) -- --#define task_is_stopped_or_traced(task) ((READ_ONCE(task->__state) & (__TASK_STOPPED | __TASK_TRACED)) != 0) -+#define task_is_traced(task) ((READ_ONCE(task->jobctl) & JOBCTL_TRACED) != 0) -+#define task_is_stopped(task) ((READ_ONCE(task->jobctl) & JOBCTL_STOPPED) != 0) -+#define task_is_stopped_or_traced(task) ((READ_ONCE(task->jobctl) & (JOBCTL_STOPPED | JOBCTL_TRACED)) != 0) - - /* - * Special states are those that do not use the normal wait-loop pattern. See ---- a/include/linux/sched/jobctl.h -+++ b/include/linux/sched/jobctl.h -@@ -21,6 +21,9 @@ struct task_struct; - #define JOBCTL_TRAP_FREEZE_BIT 23 /* trap for cgroup freezer */ - #define JOBCTL_PTRACE_FROZEN_BIT 24 /* frozen for ptrace */ - -+#define JOBCTL_STOPPED_BIT 26 /* do_signal_stop() */ -+#define JOBCTL_TRACED_BIT 27 /* ptrace_stop() */ -+ - #define JOBCTL_STOP_DEQUEUED (1UL << JOBCTL_STOP_DEQUEUED_BIT) - #define JOBCTL_STOP_PENDING (1UL << JOBCTL_STOP_PENDING_BIT) - #define JOBCTL_STOP_CONSUME (1UL << JOBCTL_STOP_CONSUME_BIT) -@@ -31,6 +34,9 @@ struct task_struct; - #define JOBCTL_TRAP_FREEZE (1UL << JOBCTL_TRAP_FREEZE_BIT) - #define JOBCTL_PTRACE_FROZEN (1UL << JOBCTL_PTRACE_FROZEN_BIT) - -+#define JOBCTL_STOPPED (1UL << JOBCTL_STOPPED_BIT) -+#define JOBCTL_TRACED (1UL << JOBCTL_TRACED_BIT) -+ - #define JOBCTL_TRAP_MASK (JOBCTL_TRAP_STOP | JOBCTL_TRAP_NOTIFY) - #define JOBCTL_PENDING_MASK (JOBCTL_STOP_PENDING | JOBCTL_TRAP_MASK) - ---- a/include/linux/sched/signal.h -+++ b/include/linux/sched/signal.h -@@ -294,8 +294,10 @@ static inline int kernel_dequeue_signal( - static inline void kernel_signal_stop(void) - { - spin_lock_irq(¤t->sighand->siglock); -- if (current->jobctl & JOBCTL_STOP_DEQUEUED) -+ if (current->jobctl & JOBCTL_STOP_DEQUEUED) { -+ current->jobctl |= JOBCTL_STOPPED; - set_special_state(TASK_STOPPED); -+ } - spin_unlock_irq(¤t->sighand->siglock); - - schedule(); -@@ -437,12 +439,21 @@ extern void signal_wake_up_state(struct - - static inline void signal_wake_up(struct task_struct *t, bool fatal) - { -- fatal = fatal && !(t->jobctl & JOBCTL_PTRACE_FROZEN); -- signal_wake_up_state(t, fatal ? TASK_WAKEKILL | __TASK_TRACED : 0); -+ unsigned int state = 0; -+ if (fatal && !(t->jobctl & JOBCTL_PTRACE_FROZEN)) { -+ t->jobctl &= ~(JOBCTL_STOPPED | JOBCTL_TRACED); -+ state = TASK_WAKEKILL | __TASK_TRACED; -+ } -+ signal_wake_up_state(t, state); - } - static inline void ptrace_signal_wake_up(struct task_struct *t, bool resume) - { -- signal_wake_up_state(t, resume ? __TASK_TRACED : 0); -+ unsigned int state = 0; -+ if (resume) { -+ t->jobctl &= ~JOBCTL_TRACED; -+ state = __TASK_TRACED; -+ } -+ signal_wake_up_state(t, state); - } - - void task_join_group_stop(struct task_struct *task); ---- a/kernel/ptrace.c -+++ b/kernel/ptrace.c -@@ -185,7 +185,12 @@ static bool looks_like_a_spurious_pid(st - return true; - } - --/* Ensure that nothing can wake it up, even SIGKILL */ -+/* -+ * Ensure that nothing can wake it up, even SIGKILL -+ * -+ * A task is switched to this state while a ptrace operation is in progress; -+ * such that the ptrace operation is uninterruptible. -+ */ - static bool ptrace_freeze_traced(struct task_struct *task) - { - bool ret = false; -@@ -216,8 +221,10 @@ static void ptrace_unfreeze_traced(struc - */ - if (lock_task_sighand(task, &flags)) { - task->jobctl &= ~JOBCTL_PTRACE_FROZEN; -- if (__fatal_signal_pending(task)) -+ if (__fatal_signal_pending(task)) { -+ task->jobctl &= ~TASK_TRACED; - wake_up_state(task, __TASK_TRACED); -+ } - unlock_task_sighand(task, &flags); - } - } -@@ -462,8 +469,10 @@ static int ptrace_attach(struct task_str - * in and out of STOPPED are protected by siglock. - */ - if (task_is_stopped(task) && -- task_set_jobctl_pending(task, JOBCTL_TRAP_STOP | JOBCTL_TRAPPING)) -+ task_set_jobctl_pending(task, JOBCTL_TRAP_STOP | JOBCTL_TRAPPING)) { -+ task->jobctl &= ~JOBCTL_STOPPED; - signal_wake_up_state(task, __TASK_STOPPED); -+ } - - spin_unlock(&task->sighand->siglock); - -@@ -875,6 +884,7 @@ static int ptrace_resume(struct task_str - */ - spin_lock_irq(&child->sighand->siglock); - child->exit_code = data; -+ child->jobctl &= ~JOBCTL_TRACED; - wake_up_state(child, __TASK_TRACED); - spin_unlock_irq(&child->sighand->siglock); - ---- a/kernel/signal.c -+++ b/kernel/signal.c -@@ -762,7 +762,10 @@ static int dequeue_synchronous_signal(ke - */ - void signal_wake_up_state(struct task_struct *t, unsigned int state) - { -+ lockdep_assert_held(&t->sighand->siglock); -+ - set_tsk_thread_flag(t, TIF_SIGPENDING); -+ - /* - * TASK_WAKEKILL also means wake it up in the stopped/traced/killable - * case. We don't check t->state here because there is a race with it -@@ -930,9 +933,10 @@ static bool prepare_signal(int sig, stru - for_each_thread(p, t) { - flush_sigqueue_mask(&flush, &t->pending); - task_clear_jobctl_pending(t, JOBCTL_STOP_PENDING); -- if (likely(!(t->ptrace & PT_SEIZED))) -+ if (likely(!(t->ptrace & PT_SEIZED))) { -+ t->jobctl &= ~JOBCTL_STOPPED; - wake_up_state(t, __TASK_STOPPED); -- else -+ } else - ptrace_trap_notify(t); - } - -@@ -2218,6 +2222,7 @@ static int ptrace_stop(int exit_code, in - return exit_code; - - set_special_state(TASK_TRACED); -+ current->jobctl |= JOBCTL_TRACED; - - /* - * We're committing to trapping. TRACED should be visible before -@@ -2436,6 +2441,7 @@ static bool do_signal_stop(int signr) - if (task_participate_group_stop(current)) - notify = CLD_STOPPED; - -+ current->jobctl |= JOBCTL_STOPPED; - set_special_state(TASK_STOPPED); - spin_unlock_irq(¤t->sighand->siglock); - diff --git a/debian/patches-rt/0013-printk-add-kthread-console-printers.patch b/debian/patches-rt/0013-printk-add-kthread-console-printers.patch deleted file mode 100644 index 0333524d2..000000000 --- a/debian/patches-rt/0013-printk-add-kthread-console-printers.patch +++ /dev/null @@ -1,531 +0,0 @@ -From: John Ogness <john.ogness@linutronix.de> -Date: Wed, 20 Apr 2022 01:52:35 +0206 -Subject: [PATCH 13/18] printk: add kthread console printers -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/5.18/older/patches-5.18-rt11.tar.xz - -Create a kthread for each console to perform console printing. During -normal operation (@system_state == SYSTEM_RUNNING), the kthread -printers are responsible for all printing on their respective -consoles. - -During non-normal operation, console printing is done as it has been: -within the context of the printk caller or within irqwork triggered -by the printk caller, referred to as direct printing. - -Since threaded console printers are responsible for all printing -during normal operation, this also includes messages generated via -deferred printk calls. If direct printing is in effect during a -deferred printk call, the queued irqwork will perform the direct -printing. To make it clear that this is the only time that the -irqwork will perform direct printing, rename the flag -PRINTK_PENDING_OUTPUT to PRINTK_PENDING_DIRECT_OUTPUT. - -Threaded console printers synchronize against each other and against -console lockers by taking the console lock for each message that is -printed. - -Note that the kthread printers do not care about direct printing. -They will always try to print if new records are available. They can -be blocked by direct printing, but will be woken again once direct -printing is finished. - -Console unregistration is a bit tricky because the associated -kthread printer cannot be stopped while the console lock is held. -A policy is implemented that states: whichever task clears -con->thread (under the console lock) is responsible for stopping -the kthread. unregister_console() will clear con->thread while -the console lock is held and then stop the kthread after releasing -the console lock. - -For consoles that have implemented the exit() callback, the kthread -is stopped before exit() is called. - -Signed-off-by: John Ogness <john.ogness@linutronix.de> -Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> -Link: https://lore.kernel.org/r/20220419234637.357112-14-john.ogness@linutronix.de -Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> ---- - include/linux/console.h | 2 - kernel/printk/printk.c | 303 ++++++++++++++++++++++++++++++++++++++++++++---- - 2 files changed, 283 insertions(+), 22 deletions(-) - ---- a/include/linux/console.h -+++ b/include/linux/console.h -@@ -153,6 +153,8 @@ struct console { - uint ospeed; - u64 seq; - unsigned long dropped; -+ struct task_struct *thread; -+ - void *data; - struct console *next; - }; ---- a/kernel/printk/printk.c -+++ b/kernel/printk/printk.c -@@ -361,6 +361,13 @@ static int console_msg_format = MSG_FORM - /* syslog_lock protects syslog_* variables and write access to clear_seq. */ - static DEFINE_MUTEX(syslog_lock); - -+/* -+ * A flag to signify if printk_activate_kthreads() has already started the -+ * kthread printers. If true, any later registered consoles must start their -+ * own kthread directly. The flag is write protected by the console_lock. -+ */ -+static bool printk_kthreads_available; -+ - #ifdef CONFIG_PRINTK - static atomic_t printk_prefer_direct = ATOMIC_INIT(0); - -@@ -390,6 +397,14 @@ void printk_prefer_direct_exit(void) - WARN_ON(atomic_dec_if_positive(&printk_prefer_direct) < 0); - } - -+static inline bool allow_direct_printing(void) -+{ -+ return (!printk_kthreads_available || -+ system_state > SYSTEM_RUNNING || -+ oops_in_progress || -+ atomic_read(&printk_prefer_direct)); -+} -+ - DECLARE_WAIT_QUEUE_HEAD(log_wait); - /* All 3 protected by @syslog_lock. */ - /* the next printk record to read by syslog(READ) or /proc/kmsg */ -@@ -2280,10 +2295,10 @@ asmlinkage int vprintk_emit(int facility - printed_len = vprintk_store(facility, level, dev_info, fmt, args); - - /* If called from the scheduler, we can not call up(). */ -- if (!in_sched) { -+ if (!in_sched && allow_direct_printing()) { - /* - * The caller may be holding system-critical or -- * timing-sensitive locks. Disable preemption during -+ * timing-sensitive locks. Disable preemption during direct - * printing of all remaining records to all consoles so that - * this context can return as soon as possible. Hopefully - * another printk() caller will take over the printing. -@@ -2326,6 +2341,8 @@ EXPORT_SYMBOL(_printk); - - static bool __pr_flush(struct console *con, int timeout_ms, bool reset_on_progress); - -+static void printk_start_kthread(struct console *con); -+ - #else /* CONFIG_PRINTK */ - - #define CONSOLE_LOG_MAX 0 -@@ -2359,6 +2376,8 @@ static void call_console_driver(struct c - } - static bool suppress_message_printing(int level) { return false; } - static bool __pr_flush(struct console *con, int timeout_ms, bool reset_on_progress) { return true; } -+static void printk_start_kthread(struct console *con) { } -+static bool allow_direct_printing(void) { return true; } - - #endif /* CONFIG_PRINTK */ - -@@ -2559,6 +2578,13 @@ void resume_console(void) - down_console_sem(); - console_suspended = 0; - console_unlock(); -+ -+ /* -+ * While suspended, new records may have been added to the -+ * ringbuffer. Wake up the kthread printers to print them. -+ */ -+ wake_up_klogd(); -+ - pr_flush(1000, true); - } - -@@ -2577,6 +2603,9 @@ static int console_cpu_notify(unsigned i - /* If trylock fails, someone else is doing the printing */ - if (console_trylock()) - console_unlock(); -+ -+ /* Wake kthread printers. Some may have become usable. */ -+ wake_up_klogd(); - } - return 0; - } -@@ -2648,18 +2677,9 @@ static bool abandon_console_lock_in_pani - return atomic_read(&panic_cpu) != raw_smp_processor_id(); - } - --/* -- * Check if the given console is currently capable and allowed to print -- * records. -- * -- * Requires the console_lock. -- */ --static inline bool console_is_usable(struct console *con) -+static inline bool __console_is_usable(short flags) - { -- if (!(con->flags & CON_ENABLED)) -- return false; -- -- if (!con->write) -+ if (!(flags & CON_ENABLED)) - return false; - - /* -@@ -2668,12 +2688,26 @@ static inline bool console_is_usable(str - * cope (CON_ANYTIME) don't call them until this CPU is officially up. - */ - if (!cpu_online(raw_smp_processor_id()) && -- !(con->flags & CON_ANYTIME)) -+ !(flags & CON_ANYTIME)) - return false; - - return true; - } - -+/* -+ * Check if the given console is currently capable and allowed to print -+ * records. -+ * -+ * Requires the console_lock. -+ */ -+static inline bool console_is_usable(struct console *con) -+{ -+ if (!con->write) -+ return false; -+ -+ return __console_is_usable(con->flags); -+} -+ - static void __console_unlock(void) - { - console_locked = 0; -@@ -2786,8 +2820,8 @@ static bool console_emit_next_record(str - * were flushed to all usable consoles. A returned false informs the caller - * that everything was not flushed (either there were no usable consoles or - * another context has taken over printing or it is a panic situation and this -- * is not the panic CPU). Regardless the reason, the caller should assume it -- * is not useful to immediately try again. -+ * is not the panic CPU or direct printing is not preferred). Regardless the -+ * reason, the caller should assume it is not useful to immediately try again. - * - * Requires the console_lock. - */ -@@ -2804,6 +2838,10 @@ static bool console_flush_all(bool do_co - *handover = false; - - do { -+ /* Let the kthread printers do the work if they can. */ -+ if (!allow_direct_printing()) -+ return false; -+ - any_progress = false; - - for_each_console(con) { -@@ -3018,6 +3056,10 @@ void console_start(struct console *conso - console_lock(); - console->flags |= CON_ENABLED; - console_unlock(); -+ -+ /* Wake the newly enabled kthread printer. */ -+ wake_up_klogd(); -+ - __pr_flush(console, 1000, true); - } - EXPORT_SYMBOL(console_start); -@@ -3218,6 +3260,8 @@ void register_console(struct console *ne - nr_ext_console_drivers++; - - newcon->dropped = 0; -+ newcon->thread = NULL; -+ - if (newcon->flags & CON_PRINTBUFFER) { - /* Get a consistent copy of @syslog_seq. */ - mutex_lock(&syslog_lock); -@@ -3227,6 +3271,10 @@ void register_console(struct console *ne - /* Begin with next message. */ - newcon->seq = prb_next_seq(prb); - } -+ -+ if (printk_kthreads_available) -+ printk_start_kthread(newcon); -+ - console_unlock(); - console_sysfs_notify(); - -@@ -3253,6 +3301,7 @@ EXPORT_SYMBOL(register_console); - - int unregister_console(struct console *console) - { -+ struct task_struct *thd; - struct console *con; - int res; - -@@ -3293,7 +3342,20 @@ int unregister_console(struct console *c - console_drivers->flags |= CON_CONSDEV; - - console->flags &= ~CON_ENABLED; -+ -+ /* -+ * console->thread can only be cleared under the console lock. But -+ * stopping the thread must be done without the console lock. The -+ * task that clears @thread is the task that stops the kthread. -+ */ -+ thd = console->thread; -+ console->thread = NULL; -+ - console_unlock(); -+ -+ if (thd) -+ kthread_stop(thd); -+ - console_sysfs_notify(); - - if (console->exit) -@@ -3389,6 +3451,20 @@ static int __init printk_late_init(void) - } - late_initcall(printk_late_init); - -+static int __init printk_activate_kthreads(void) -+{ -+ struct console *con; -+ -+ console_lock(); -+ printk_kthreads_available = true; -+ for_each_console(con) -+ printk_start_kthread(con); -+ console_unlock(); -+ -+ return 0; -+} -+early_initcall(printk_activate_kthreads); -+ - #if defined CONFIG_PRINTK - /* If @con is specified, only wait for that console. Otherwise wait for all. */ - static bool __pr_flush(struct console *con, int timeout_ms, bool reset_on_progress) -@@ -3463,11 +3539,180 @@ bool pr_flush(int timeout_ms, bool reset - } - EXPORT_SYMBOL(pr_flush); - -+static void __printk_fallback_preferred_direct(void) -+{ -+ printk_prefer_direct_enter(); -+ pr_err("falling back to preferred direct printing\n"); -+ printk_kthreads_available = false; -+} -+ -+/* -+ * Enter preferred direct printing, but never exit. Mark console threads as -+ * unavailable. The system is then forever in preferred direct printing and -+ * any printing threads will exit. -+ * -+ * Must *not* be called under console_lock. Use -+ * __printk_fallback_preferred_direct() if already holding console_lock. -+ */ -+static void printk_fallback_preferred_direct(void) -+{ -+ console_lock(); -+ __printk_fallback_preferred_direct(); -+ console_unlock(); -+} -+ -+static bool printer_should_wake(struct console *con, u64 seq) -+{ -+ short flags; -+ -+ if (kthread_should_stop() || !printk_kthreads_available) -+ return true; -+ -+ if (console_suspended) -+ return false; -+ -+ /* -+ * This is an unsafe read from con->flags, but a false positive is -+ * not a problem. Worst case it would allow the printer to wake up -+ * although it is disabled. But the printer will notice that when -+ * attempting to print and instead go back to sleep. -+ */ -+ flags = data_race(READ_ONCE(con->flags)); -+ -+ if (!__console_is_usable(flags)) -+ return false; -+ -+ return prb_read_valid(prb, seq, NULL); -+} -+ -+static int printk_kthread_func(void *data) -+{ -+ struct console *con = data; -+ char *dropped_text = NULL; -+ char *ext_text = NULL; -+ bool handover; -+ u64 seq = 0; -+ char *text; -+ int error; -+ -+ text = kmalloc(CONSOLE_LOG_MAX, GFP_KERNEL); -+ if (!text) { -+ con_printk(KERN_ERR, con, "failed to allocate text buffer\n"); -+ printk_fallback_preferred_direct(); -+ goto out; -+ } -+ -+ if (con->flags & CON_EXTENDED) { -+ ext_text = kmalloc(CONSOLE_EXT_LOG_MAX, GFP_KERNEL); -+ if (!ext_text) { -+ con_printk(KERN_ERR, con, "failed to allocate ext_text buffer\n"); -+ printk_fallback_preferred_direct(); -+ goto out; -+ } -+ } else { -+ dropped_text = kmalloc(DROPPED_TEXT_MAX, GFP_KERNEL); -+ if (!dropped_text) { -+ con_printk(KERN_ERR, con, "failed to allocate dropped_text buffer\n"); -+ printk_fallback_preferred_direct(); -+ goto out; -+ } -+ } -+ -+ con_printk(KERN_INFO, con, "printing thread started\n"); -+ -+ for (;;) { -+ /* -+ * Guarantee this task is visible on the waitqueue before -+ * checking the wake condition. -+ * -+ * The full memory barrier within set_current_state() of -+ * prepare_to_wait_event() pairs with the full memory barrier -+ * within wq_has_sleeper(). -+ * -+ * This pairs with __wake_up_klogd:A. -+ */ -+ error = wait_event_interruptible(log_wait, -+ printer_should_wake(con, seq)); /* LMM(printk_kthread_func:A) */ -+ -+ if (kthread_should_stop() || !printk_kthreads_available) -+ break; -+ -+ if (error) -+ continue; -+ -+ console_lock(); -+ -+ if (console_suspended) { -+ up_console_sem(); -+ continue; -+ } -+ -+ if (!console_is_usable(con)) { -+ __console_unlock(); -+ continue; -+ } -+ -+ /* -+ * Even though the printk kthread is always preemptible, it is -+ * still not allowed to call cond_resched() from within -+ * console drivers. The task may become non-preemptible in the -+ * console driver call chain. For example, vt_console_print() -+ * takes a spinlock and then can call into fbcon_redraw(), -+ * which can conditionally invoke cond_resched(). -+ */ -+ console_may_schedule = 0; -+ console_emit_next_record(con, text, ext_text, dropped_text, &handover); -+ if (handover) -+ continue; -+ -+ seq = con->seq; -+ -+ __console_unlock(); -+ } -+ -+ con_printk(KERN_INFO, con, "printing thread stopped\n"); -+out: -+ kfree(dropped_text); -+ kfree(ext_text); -+ kfree(text); -+ -+ console_lock(); -+ /* -+ * If this kthread is being stopped by another task, con->thread will -+ * already be NULL. That is fine. The important thing is that it is -+ * NULL after the kthread exits. -+ */ -+ con->thread = NULL; -+ console_unlock(); -+ -+ return 0; -+} -+ -+/* Must be called under console_lock. */ -+static void printk_start_kthread(struct console *con) -+{ -+ /* -+ * Do not start a kthread if there is no write() callback. The -+ * kthreads assume the write() callback exists. -+ */ -+ if (!con->write) -+ return; -+ -+ con->thread = kthread_run(printk_kthread_func, con, -+ "pr/%s%d", con->name, con->index); -+ if (IS_ERR(con->thread)) { -+ con->thread = NULL; -+ con_printk(KERN_ERR, con, "unable to start printing thread\n"); -+ __printk_fallback_preferred_direct(); -+ return; -+ } -+} -+ - /* - * Delayed printk version, for scheduler-internal messages: - */ --#define PRINTK_PENDING_WAKEUP 0x01 --#define PRINTK_PENDING_OUTPUT 0x02 -+#define PRINTK_PENDING_WAKEUP 0x01 -+#define PRINTK_PENDING_DIRECT_OUTPUT 0x02 - - static DEFINE_PER_CPU(int, printk_pending); - -@@ -3475,10 +3720,14 @@ static void wake_up_klogd_work_func(stru - { - int pending = this_cpu_xchg(printk_pending, 0); - -- if (pending & PRINTK_PENDING_OUTPUT) { -+ if (pending & PRINTK_PENDING_DIRECT_OUTPUT) { -+ printk_prefer_direct_enter(); -+ - /* If trylock fails, someone else is doing the printing */ - if (console_trylock()) - console_unlock(); -+ -+ printk_prefer_direct_exit(); - } - - if (pending & PRINTK_PENDING_WAKEUP) -@@ -3503,10 +3752,11 @@ static void __wake_up_klogd(int val) - * prepare_to_wait_event(), which is called after ___wait_event() adds - * the waiter but before it has checked the wait condition. - * -- * This pairs with devkmsg_read:A and syslog_print:A. -+ * This pairs with devkmsg_read:A, syslog_print:A, and -+ * printk_kthread_func:A. - */ - if (wq_has_sleeper(&log_wait) || /* LMM(__wake_up_klogd:A) */ -- (val & PRINTK_PENDING_OUTPUT)) { -+ (val & PRINTK_PENDING_DIRECT_OUTPUT)) { - this_cpu_or(printk_pending, val); - irq_work_queue(this_cpu_ptr(&wake_up_klogd_work)); - } -@@ -3524,7 +3774,16 @@ void defer_console_output(void) - * New messages may have been added directly to the ringbuffer - * using vprintk_store(), so wake any waiters as well. - */ -- __wake_up_klogd(PRINTK_PENDING_WAKEUP | PRINTK_PENDING_OUTPUT); -+ int val = PRINTK_PENDING_WAKEUP; -+ -+ /* -+ * If console deferring was called with preferred direct printing, -+ * make the irqwork perform the direct printing. -+ */ -+ if (atomic_read(&printk_prefer_direct)) -+ val |= PRINTK_PENDING_DIRECT_OUTPUT; -+ -+ __wake_up_klogd(val); - } - - void printk_trigger_flush(void) diff --git a/debian/patches-rt/0014-printk-extend-console_lock-for-per-console-locking.patch b/debian/patches-rt/0014-printk-extend-console_lock-for-per-console-locking.patch deleted file mode 100644 index 460480472..000000000 --- a/debian/patches-rt/0014-printk-extend-console_lock-for-per-console-locking.patch +++ /dev/null @@ -1,585 +0,0 @@ -From: John Ogness <john.ogness@linutronix.de> -Date: Mon, 25 Apr 2022 23:04:28 +0206 -Subject: [PATCH 14/18] printk: extend console_lock for per-console locking -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/5.18/older/patches-5.18-rt11.tar.xz - -Currently threaded console printers synchronize against each -other using console_lock(). However, different console drivers -are unrelated and do not require any synchronization between -each other. Removing the synchronization between the threaded -console printers will allow each console to print at its own -speed. - -But the threaded consoles printers do still need to synchronize -against console_lock() callers. Introduce a per-console mutex -and a new console boolean field @blocked to provide this -synchronization. - -console_lock() is modified so that it must acquire the mutex -of each console in order to set the @blocked field. Console -printing threads will acquire their mutex while printing a -record. If @blocked was set, the thread will go back to sleep -instead of printing. - -The reason for the @blocked boolean field is so that -console_lock() callers do not need to acquire multiple console -mutexes simultaneously, which would introduce unnecessary -complexity due to nested mutex locking. Also, a new field -was chosen instead of adding a new @flags value so that the -blocked status could be checked without concern of reading -inconsistent values due to @flags updates from other contexts. - -Threaded console printers also need to synchronize against -console_trylock() callers. Since console_trylock() may be -called from any context, the per-console mutex cannot be used -for this synchronization. (mutex_trylock() cannot be called -from atomic contexts.) Introduce a global atomic counter to -identify if any threaded printers are active. The threaded -printers will also check the atomic counter to identify if the -console has been locked by another task via console_trylock(). - -Note that @console_sem is still used to provide synchronization -between console_lock() and console_trylock() callers. - -A locking overview for console_lock(), console_trylock(), and the -threaded printers is as follows (pseudo code): - -console_lock() -{ - down(&console_sem); - for_each_console(con) { - mutex_lock(&con->lock); - con->blocked = true; - mutex_unlock(&con->lock); - } - /* console_lock acquired */ -} - -console_trylock() -{ - if (down_trylock(&console_sem) == 0) { - if (atomic_cmpxchg(&console_kthreads_active, 0, -1) == 0) { - /* console_lock acquired */ - } - } -} - -threaded_printer() -{ - mutex_lock(&con->lock); - if (!con->blocked) { - /* console_lock() callers blocked */ - - if (atomic_inc_unless_negative(&console_kthreads_active)) { - /* console_trylock() callers blocked */ - - con->write(); - - atomic_dec(&console_lock_count); - } - } - mutex_unlock(&con->lock); -} - -The console owner and waiter logic now only applies between contexts -that have taken the console_lock via console_trylock(). Threaded -printers never take the console_lock, so they do not have a -console_lock to handover. Tasks that have used console_lock() will -block the threaded printers using a mutex and if the console_lock -is handed over to an atomic context, it would be unable to unblock -the threaded printers. However, the console_trylock() case is -really the only scenario that is interesting for handovers anyway. - -@panic_console_dropped must change to atomic_t since it is no longer -protected exclusively by the console_lock. - -Since threaded printers remain asleep if they see that the console -is locked, they now must be explicitly woken in __console_unlock(). -This means wake_up_klogd() calls following a console_unlock() are -no longer necessary and are removed. - -Also note that threaded printers no longer need to check -@console_suspended. The check for the @blocked field implicitly -covers the suspended console case. - -Signed-off-by: John Ogness <john.ogness@linutronix.de> -Reviewed-by: Petr Mladek <pmladek@suse.com> -Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> ---- - include/linux/console.h | 15 ++ - kernel/printk/printk.c | 261 +++++++++++++++++++++++++++++++++++++----------- - 2 files changed, 220 insertions(+), 56 deletions(-) - ---- a/include/linux/console.h -+++ b/include/linux/console.h -@@ -16,6 +16,7 @@ - - #include <linux/atomic.h> - #include <linux/types.h> -+#include <linux/mutex.h> - - struct vc_data; - struct console_font_op; -@@ -154,6 +155,20 @@ struct console { - u64 seq; - unsigned long dropped; - struct task_struct *thread; -+ bool blocked; -+ -+ /* -+ * The per-console lock is used by printing kthreads to synchronize -+ * this console with callers of console_lock(). This is necessary in -+ * order to allow printing kthreads to run in parallel to each other, -+ * while each safely accessing the @blocked field and synchronizing -+ * against direct printing via console_lock/console_unlock. -+ * -+ * Note: For synchronizing against direct printing via -+ * console_trylock/console_unlock, see the static global -+ * variable @console_kthreads_active. -+ */ -+ struct mutex lock; - - void *data; - struct console *next; ---- a/kernel/printk/printk.c -+++ b/kernel/printk/printk.c -@@ -224,6 +224,33 @@ int devkmsg_sysctl_set_loglvl(struct ctl - static int nr_ext_console_drivers; - - /* -+ * Used to synchronize printing kthreads against direct printing via -+ * console_trylock/console_unlock. -+ * -+ * Values: -+ * -1 = console kthreads atomically blocked (via global trylock) -+ * 0 = no kthread printing, console not locked (via trylock) -+ * >0 = kthread(s) actively printing -+ * -+ * Note: For synchronizing against direct printing via -+ * console_lock/console_unlock, see the @lock variable in -+ * struct console. -+ */ -+static atomic_t console_kthreads_active = ATOMIC_INIT(0); -+ -+#define console_kthreads_atomic_tryblock() \ -+ (atomic_cmpxchg(&console_kthreads_active, 0, -1) == 0) -+#define console_kthreads_atomic_unblock() \ -+ atomic_cmpxchg(&console_kthreads_active, -1, 0) -+#define console_kthreads_atomically_blocked() \ -+ (atomic_read(&console_kthreads_active) == -1) -+ -+#define console_kthread_printing_tryenter() \ -+ atomic_inc_unless_negative(&console_kthreads_active) -+#define console_kthread_printing_exit() \ -+ atomic_dec(&console_kthreads_active) -+ -+/* - * Helper macros to handle lockdep when locking/unlocking console_sem. We use - * macros instead of functions so that _RET_IP_ contains useful information. - */ -@@ -271,6 +298,49 @@ static bool panic_in_progress(void) - } - - /* -+ * Tracks whether kthread printers are all blocked. A value of true implies -+ * that the console is locked via console_lock() or the console is suspended. -+ * Writing to this variable requires holding @console_sem. -+ */ -+static bool console_kthreads_blocked; -+ -+/* -+ * Block all kthread printers from a schedulable context. -+ * -+ * Requires holding @console_sem. -+ */ -+static void console_kthreads_block(void) -+{ -+ struct console *con; -+ -+ for_each_console(con) { -+ mutex_lock(&con->lock); -+ con->blocked = true; -+ mutex_unlock(&con->lock); -+ } -+ -+ console_kthreads_blocked = true; -+} -+ -+/* -+ * Unblock all kthread printers from a schedulable context. -+ * -+ * Requires holding @console_sem. -+ */ -+static void console_kthreads_unblock(void) -+{ -+ struct console *con; -+ -+ for_each_console(con) { -+ mutex_lock(&con->lock); -+ con->blocked = false; -+ mutex_unlock(&con->lock); -+ } -+ -+ console_kthreads_blocked = false; -+} -+ -+/* - * This is used for debugging the mess that is the VT code by - * keeping track if we have the console semaphore held. It's - * definitely not the perfect debug tool (we don't know if _WE_ -@@ -2578,13 +2648,6 @@ void resume_console(void) - down_console_sem(); - console_suspended = 0; - console_unlock(); -- -- /* -- * While suspended, new records may have been added to the -- * ringbuffer. Wake up the kthread printers to print them. -- */ -- wake_up_klogd(); -- - pr_flush(1000, true); - } - -@@ -2603,9 +2666,14 @@ static int console_cpu_notify(unsigned i - /* If trylock fails, someone else is doing the printing */ - if (console_trylock()) - console_unlock(); -- -- /* Wake kthread printers. Some may have become usable. */ -- wake_up_klogd(); -+ else { -+ /* -+ * If a new CPU comes online, the conditions for -+ * printer_should_wake() may have changed for some -+ * kthread printer with !CON_ANYTIME. -+ */ -+ wake_up_klogd(); -+ } - } - return 0; - } -@@ -2625,6 +2693,7 @@ void console_lock(void) - down_console_sem(); - if (console_suspended) - return; -+ console_kthreads_block(); - console_locked = 1; - console_may_schedule = 1; - } -@@ -2646,6 +2715,10 @@ int console_trylock(void) - up_console_sem(); - return 0; - } -+ if (!console_kthreads_atomic_tryblock()) { -+ up_console_sem(); -+ return 0; -+ } - console_locked = 1; - console_may_schedule = 0; - return 1; -@@ -2654,7 +2727,7 @@ EXPORT_SYMBOL(console_trylock); - - int is_console_locked(void) - { -- return console_locked; -+ return (console_locked || atomic_read(&console_kthreads_active)); - } - EXPORT_SYMBOL(is_console_locked); - -@@ -2698,7 +2771,7 @@ static inline bool __console_is_usable(s - * Check if the given console is currently capable and allowed to print - * records. - * -- * Requires the console_lock. -+ * Requires holding the console_lock. - */ - static inline bool console_is_usable(struct console *con) - { -@@ -2711,6 +2784,22 @@ static inline bool console_is_usable(str - static void __console_unlock(void) - { - console_locked = 0; -+ -+ /* -+ * Depending on whether console_lock() or console_trylock() was used, -+ * appropriately allow the kthread printers to continue. -+ */ -+ if (console_kthreads_blocked) -+ console_kthreads_unblock(); -+ else -+ console_kthreads_atomic_unblock(); -+ -+ /* -+ * New records may have arrived while the console was locked. -+ * Wake the kthread printers to print them. -+ */ -+ wake_up_klogd(); -+ - up_console_sem(); - } - -@@ -2728,17 +2817,19 @@ static void __console_unlock(void) - * - * @handover will be set to true if a printk waiter has taken over the - * console_lock, in which case the caller is no longer holding the -- * console_lock. Otherwise it is set to false. -+ * console_lock. Otherwise it is set to false. A NULL pointer may be provided -+ * to disable allowing the console_lock to be taken over by a printk waiter. - * - * Returns false if the given console has no next record to print, otherwise - * true. - * -- * Requires the console_lock. -+ * Requires the console_lock if @handover is non-NULL. -+ * Requires con->lock otherwise. - */ --static bool console_emit_next_record(struct console *con, char *text, char *ext_text, -- char *dropped_text, bool *handover) -+static bool __console_emit_next_record(struct console *con, char *text, char *ext_text, -+ char *dropped_text, bool *handover) - { -- static int panic_console_dropped; -+ static atomic_t panic_console_dropped = ATOMIC_INIT(0); - struct printk_info info; - struct printk_record r; - unsigned long flags; -@@ -2747,7 +2838,8 @@ static bool console_emit_next_record(str - - prb_rec_init_rd(&r, &info, text, CONSOLE_LOG_MAX); - -- *handover = false; -+ if (handover) -+ *handover = false; - - if (!prb_read_valid(prb, con->seq, &r)) - return false; -@@ -2755,7 +2847,8 @@ static bool console_emit_next_record(str - if (con->seq != r.info->seq) { - con->dropped += r.info->seq - con->seq; - con->seq = r.info->seq; -- if (panic_in_progress() && panic_console_dropped++ > 10) { -+ if (panic_in_progress() && -+ atomic_fetch_inc_relaxed(&panic_console_dropped) > 10) { - suppress_panic_printk = 1; - pr_warn_once("Too many dropped messages. Suppress messages on non-panic CPUs to prevent livelock.\n"); - } -@@ -2777,32 +2870,62 @@ static bool console_emit_next_record(str - len = record_print_text(&r, console_msg_format & MSG_FORMAT_SYSLOG, printk_time); - } - -- /* -- * While actively printing out messages, if another printk() -- * were to occur on another CPU, it may wait for this one to -- * finish. This task can not be preempted if there is a -- * waiter waiting to take over. -- * -- * Interrupts are disabled because the hand over to a waiter -- * must not be interrupted until the hand over is completed -- * (@console_waiter is cleared). -- */ -- printk_safe_enter_irqsave(flags); -- console_lock_spinning_enable(); -+ if (handover) { -+ /* -+ * While actively printing out messages, if another printk() -+ * were to occur on another CPU, it may wait for this one to -+ * finish. This task can not be preempted if there is a -+ * waiter waiting to take over. -+ * -+ * Interrupts are disabled because the hand over to a waiter -+ * must not be interrupted until the hand over is completed -+ * (@console_waiter is cleared). -+ */ -+ printk_safe_enter_irqsave(flags); -+ console_lock_spinning_enable(); -+ -+ /* don't trace irqsoff print latency */ -+ stop_critical_timings(); -+ } - -- stop_critical_timings(); /* don't trace print latency */ - call_console_driver(con, write_text, len, dropped_text); -- start_critical_timings(); - - con->seq++; - -- *handover = console_lock_spinning_disable_and_check(); -- printk_safe_exit_irqrestore(flags); -+ if (handover) { -+ start_critical_timings(); -+ *handover = console_lock_spinning_disable_and_check(); -+ printk_safe_exit_irqrestore(flags); -+ } - skip: - return true; - } - - /* -+ * Print a record for a given console, but allow another printk() caller to -+ * take over the console_lock and continue printing. -+ * -+ * Requires the console_lock, but depending on @handover after the call, the -+ * caller may no longer have the console_lock. -+ * -+ * See __console_emit_next_record() for argument and return details. -+ */ -+static bool console_emit_next_record_transferable(struct console *con, char *text, char *ext_text, -+ char *dropped_text, bool *handover) -+{ -+ /* -+ * Handovers are only supported if threaded printers are atomically -+ * blocked. The context taking over the console_lock may be atomic. -+ */ -+ if (!console_kthreads_atomically_blocked()) { -+ *handover = false; -+ handover = NULL; -+ } -+ -+ return __console_emit_next_record(con, text, ext_text, dropped_text, handover); -+} -+ -+/* - * Print out all remaining records to all consoles. - * - * @do_cond_resched is set by the caller. It can be true only in schedulable -@@ -2853,13 +2976,11 @@ static bool console_flush_all(bool do_co - - if (con->flags & CON_EXTENDED) { - /* Extended consoles do not print "dropped messages". */ -- progress = console_emit_next_record(con, &text[0], -- &ext_text[0], NULL, -- handover); -+ progress = console_emit_next_record_transferable(con, &text[0], -+ &ext_text[0], NULL, handover); - } else { -- progress = console_emit_next_record(con, &text[0], -- NULL, &dropped_text[0], -- handover); -+ progress = console_emit_next_record_transferable(con, &text[0], -+ NULL, &dropped_text[0], handover); - } - if (*handover) - return false; -@@ -2974,6 +3095,10 @@ void console_unblank(void) - if (oops_in_progress) { - if (down_trylock_console_sem() != 0) - return; -+ if (!console_kthreads_atomic_tryblock()) { -+ up_console_sem(); -+ return; -+ } - } else - console_lock(); - -@@ -3056,10 +3181,6 @@ void console_start(struct console *conso - console_lock(); - console->flags |= CON_ENABLED; - console_unlock(); -- -- /* Wake the newly enabled kthread printer. */ -- wake_up_klogd(); -- - __pr_flush(console, 1000, true); - } - EXPORT_SYMBOL(console_start); -@@ -3261,6 +3382,8 @@ void register_console(struct console *ne - - newcon->dropped = 0; - newcon->thread = NULL; -+ newcon->blocked = true; -+ mutex_init(&newcon->lock); - - if (newcon->flags & CON_PRINTBUFFER) { - /* Get a consistent copy of @syslog_seq. */ -@@ -3561,6 +3684,19 @@ static void printk_fallback_preferred_di - console_unlock(); - } - -+/* -+ * Print a record for a given console, not allowing another printk() caller -+ * to take over. This is appropriate for contexts that do not have the -+ * console_lock. -+ * -+ * See __console_emit_next_record() for argument and return details. -+ */ -+static bool console_emit_next_record(struct console *con, char *text, char *ext_text, -+ char *dropped_text) -+{ -+ return __console_emit_next_record(con, text, ext_text, dropped_text, NULL); -+} -+ - static bool printer_should_wake(struct console *con, u64 seq) - { - short flags; -@@ -3568,8 +3704,10 @@ static bool printer_should_wake(struct c - if (kthread_should_stop() || !printk_kthreads_available) - return true; - -- if (console_suspended) -+ if (con->blocked || -+ console_kthreads_atomically_blocked()) { - return false; -+ } - - /* - * This is an unsafe read from con->flags, but a false positive is -@@ -3590,7 +3728,6 @@ static int printk_kthread_func(void *dat - struct console *con = data; - char *dropped_text = NULL; - char *ext_text = NULL; -- bool handover; - u64 seq = 0; - char *text; - int error; -@@ -3640,15 +3777,27 @@ static int printk_kthread_func(void *dat - if (error) - continue; - -- console_lock(); -+ error = mutex_lock_interruptible(&con->lock); -+ if (error) -+ continue; - -- if (console_suspended) { -- up_console_sem(); -+ if (con->blocked || -+ !console_kthread_printing_tryenter()) { -+ /* Another context has locked the console_lock. */ -+ mutex_unlock(&con->lock); - continue; - } - -- if (!console_is_usable(con)) { -- __console_unlock(); -+ /* -+ * Although this context has not locked the console_lock, it -+ * is known that the console_lock is not locked and it is not -+ * possible for any other context to lock the console_lock. -+ * Therefore it is safe to read con->flags. -+ */ -+ -+ if (!__console_is_usable(con->flags)) { -+ console_kthread_printing_exit(); -+ mutex_unlock(&con->lock); - continue; - } - -@@ -3661,13 +3810,13 @@ static int printk_kthread_func(void *dat - * which can conditionally invoke cond_resched(). - */ - console_may_schedule = 0; -- console_emit_next_record(con, text, ext_text, dropped_text, &handover); -- if (handover) -- continue; -+ console_emit_next_record(con, text, ext_text, dropped_text); - - seq = con->seq; - -- __console_unlock(); -+ console_kthread_printing_exit(); -+ -+ mutex_unlock(&con->lock); - } - - con_printk(KERN_INFO, con, "printing thread stopped\n"); diff --git a/debian/patches-rt/0015-printk-remove-console_locked.patch b/debian/patches-rt/0015-printk-remove-console_locked.patch deleted file mode 100644 index 23d0dbd64..000000000 --- a/debian/patches-rt/0015-printk-remove-console_locked.patch +++ /dev/null @@ -1,97 +0,0 @@ -From: John Ogness <john.ogness@linutronix.de> -Date: Wed, 20 Apr 2022 01:52:37 +0206 -Subject: [PATCH 15/18] printk: remove @console_locked -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/5.18/older/patches-5.18-rt11.tar.xz - -The static global variable @console_locked is used to help debug -VT code to make sure that certain code paths are running with -the console_lock held. However, this information is also available -with the static global variable @console_kthreads_blocked (for -locking via console_lock()), and the static global variable -@console_kthreads_active (for locking via console_trylock()). - -Remove @console_locked and update is_console_locked() to use the -alternative variables. - -Signed-off-by: John Ogness <john.ogness@linutronix.de> -Reviewed-by: Petr Mladek <pmladek@suse.com> -Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> -Link: https://lore.kernel.org/r/20220419234637.357112-16-john.ogness@linutronix.de -Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> ---- - kernel/printk/printk.c | 29 ++++++++++++++--------------- - 1 file changed, 14 insertions(+), 15 deletions(-) - ---- a/kernel/printk/printk.c -+++ b/kernel/printk/printk.c -@@ -340,15 +340,7 @@ static void console_kthreads_unblock(voi - console_kthreads_blocked = false; - } - --/* -- * This is used for debugging the mess that is the VT code by -- * keeping track if we have the console semaphore held. It's -- * definitely not the perfect debug tool (we don't know if _WE_ -- * hold it and are racing, but it helps tracking those weird code -- * paths in the console code where we end up in places I want -- * locked without the console semaphore held). -- */ --static int console_locked, console_suspended; -+static int console_suspended; - - /* - * Array of consoles built from command line options (console=) -@@ -2694,7 +2686,6 @@ void console_lock(void) - if (console_suspended) - return; - console_kthreads_block(); -- console_locked = 1; - console_may_schedule = 1; - } - EXPORT_SYMBOL(console_lock); -@@ -2719,15 +2710,26 @@ int console_trylock(void) - up_console_sem(); - return 0; - } -- console_locked = 1; - console_may_schedule = 0; - return 1; - } - EXPORT_SYMBOL(console_trylock); - -+/* -+ * This is used to help to make sure that certain paths within the VT code are -+ * running with the console lock held. It is definitely not the perfect debug -+ * tool (it is not known if the VT code is the task holding the console lock), -+ * but it helps tracking those weird code paths in the console code such as -+ * when the console is suspended: where the console is not locked but no -+ * console printing may occur. -+ * -+ * Note: This returns true when the console is suspended but is not locked. -+ * This is intentional because the VT code must consider that situation -+ * the same as if the console was locked. -+ */ - int is_console_locked(void) - { -- return (console_locked || atomic_read(&console_kthreads_active)); -+ return (console_kthreads_blocked || atomic_read(&console_kthreads_active)); - } - EXPORT_SYMBOL(is_console_locked); - -@@ -2783,8 +2785,6 @@ static inline bool console_is_usable(str - - static void __console_unlock(void) - { -- console_locked = 0; -- - /* - * Depending on whether console_lock() or console_trylock() was used, - * appropriately allow the kthread printers to continue. -@@ -3102,7 +3102,6 @@ void console_unblank(void) - } else - console_lock(); - -- console_locked = 1; - console_may_schedule = 0; - for_each_console(c) - if ((c->flags & CON_ENABLED) && c->unblank) diff --git a/debian/patches-rt/0016-printk-add-infrastucture-for-atomic-consoles.patch b/debian/patches-rt/0016-printk-add-infrastucture-for-atomic-consoles.patch index 580d99b96..1c5995684 100644 --- a/debian/patches-rt/0016-printk-add-infrastucture-for-atomic-consoles.patch +++ b/debian/patches-rt/0016-printk-add-infrastucture-for-atomic-consoles.patch @@ -1,7 +1,7 @@ From: John Ogness <john.ogness@linutronix.de> Date: Fri, 4 Feb 2022 16:01:17 +0106 Subject: [PATCH 16/18] printk: add infrastucture for atomic consoles -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/5.18/older/patches-5.18-rt11.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/5.19/older/patches-5.19-rc3-rt5.tar.xz Many times it is not possible to see the console output on panic because printing threads cannot be scheduled and/or the @@ -76,7 +76,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> }; --- a/init/Kconfig +++ b/init/Kconfig -@@ -1545,6 +1545,10 @@ config PRINTK +@@ -1574,6 +1574,10 @@ config PRINTK very difficult to diagnose system problems, saying N here is strongly discouraged. @@ -89,7 +89,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> default y --- a/kernel/panic.c +++ b/kernel/panic.c -@@ -233,7 +233,6 @@ void panic(const char *fmt, ...) +@@ -257,7 +257,6 @@ void panic(const char *fmt, ...) panic_smp_self_stop(); console_verbose(); @@ -97,7 +97,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> va_start(args, fmt); len = vscnprintf(buf, sizeof(buf), fmt, args); va_end(args); -@@ -250,6 +249,11 @@ void panic(const char *fmt, ...) +@@ -274,6 +273,11 @@ void panic(const char *fmt, ...) dump_stack(); #endif @@ -119,7 +119,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> #include <linux/sched/clock.h> #include <linux/sched/debug.h> #include <linux/sched/task_stack.h> -@@ -2035,21 +2036,30 @@ static int console_trylock_spinning(void +@@ -2063,19 +2064,28 @@ static int console_trylock_spinning(void * dropped, a dropped message will be written out first. */ static void call_console_driver(struct console *con, const char *text, size_t len, @@ -129,8 +129,6 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> + unsigned long dropped = 0; size_t dropped_len; - trace_console_rcuidle(text, len); - - if (con->dropped && dropped_text) { + if (dropped_text) + dropped = atomic_long_xchg_relaxed(&con->dropped, 0); @@ -156,7 +154,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> } /* -@@ -2401,6 +2411,76 @@ asmlinkage __visible int _printk(const c +@@ -2429,6 +2439,76 @@ asmlinkage __visible int _printk(const c } EXPORT_SYMBOL(_printk); @@ -233,7 +231,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> static bool __pr_flush(struct console *con, int timeout_ms, bool reset_on_progress); static void printk_start_kthread(struct console *con); -@@ -2415,6 +2495,8 @@ static void printk_start_kthread(struct +@@ -2443,6 +2523,8 @@ static void printk_start_kthread(struct #define prb_first_valid_seq(rb) 0 #define prb_next_seq(rb) 0 @@ -242,7 +240,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> static u64 syslog_seq; static size_t record_print_text(const struct printk_record *r, -@@ -2433,7 +2515,7 @@ static ssize_t msg_print_ext_body(char * +@@ -2461,7 +2543,7 @@ static ssize_t msg_print_ext_body(char * static void console_lock_spinning_enable(void) { } static int console_lock_spinning_disable_and_check(void) { return 0; } static void call_console_driver(struct console *con, const char *text, size_t len, @@ -251,7 +249,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> { } static bool suppress_message_printing(int level) { return false; } -@@ -2775,10 +2857,20 @@ static inline bool __console_is_usable(s +@@ -2803,10 +2885,20 @@ static inline bool __console_is_usable(s * * Requires holding the console_lock. */ @@ -274,7 +272,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> return __console_is_usable(con->flags); } -@@ -2803,6 +2895,66 @@ static void __console_unlock(void) +@@ -2831,6 +2923,66 @@ static void __console_unlock(void) up_console_sem(); } @@ -341,7 +339,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> /* * Print one record for the given console. The record printed is whatever * record is the next available record for the given console. -@@ -2815,6 +2967,8 @@ static void __console_unlock(void) +@@ -2843,6 +2995,8 @@ static void __console_unlock(void) * If dropped messages should be printed, @dropped_text is a buffer of size * DROPPED_TEXT_MAX. Otherwise @dropped_text must be NULL. * @@ -350,7 +348,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> * @handover will be set to true if a printk waiter has taken over the * console_lock, in which case the caller is no longer holding the * console_lock. Otherwise it is set to false. A NULL pointer may be provided -@@ -2827,7 +2981,8 @@ static void __console_unlock(void) +@@ -2855,7 +3009,8 @@ static void __console_unlock(void) * Requires con->lock otherwise. */ static bool __console_emit_next_record(struct console *con, char *text, char *ext_text, @@ -360,7 +358,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> { static atomic_t panic_console_dropped = ATOMIC_INIT(0); struct printk_info info; -@@ -2835,18 +2990,22 @@ static bool __console_emit_next_record(s +@@ -2863,18 +3018,22 @@ static bool __console_emit_next_record(s unsigned long flags; char *write_text; size_t len; @@ -387,7 +385,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> if (panic_in_progress() && atomic_fetch_inc_relaxed(&panic_console_dropped) > 10) { suppress_panic_printk = 1; -@@ -2856,7 +3015,7 @@ static bool __console_emit_next_record(s +@@ -2884,7 +3043,7 @@ static bool __console_emit_next_record(s /* Skip record that has level above the console loglevel. */ if (suppress_message_printing(r.info->level)) { @@ -396,7 +394,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> goto skip; } -@@ -2888,9 +3047,9 @@ static bool __console_emit_next_record(s +@@ -2916,9 +3075,9 @@ static bool __console_emit_next_record(s stop_critical_timings(); } @@ -408,7 +406,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> if (handover) { start_critical_timings(); -@@ -2922,7 +3081,7 @@ static bool console_emit_next_record_tra +@@ -2950,7 +3109,7 @@ static bool console_emit_next_record_tra handover = NULL; } @@ -417,7 +415,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> } /* -@@ -2970,7 +3129,7 @@ static bool console_flush_all(bool do_co +@@ -2998,7 +3157,7 @@ static bool console_flush_all(bool do_co for_each_console(con) { bool progress; @@ -426,7 +424,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> continue; any_usable = true; -@@ -3005,6 +3164,68 @@ static bool console_flush_all(bool do_co +@@ -3033,6 +3192,68 @@ static bool console_flush_all(bool do_co return any_usable; } @@ -495,7 +493,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> /** * console_unlock - unlock the console system * -@@ -3120,6 +3341,11 @@ void console_unblank(void) +@@ -3148,6 +3369,11 @@ void console_unblank(void) */ void console_flush_on_panic(enum con_flush_mode mode) { @@ -507,7 +505,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> /* * If someone else is holding the console lock, trylock will fail * and may_schedule may be set. Ignore and proceed to unlock so -@@ -3136,7 +3362,7 @@ void console_flush_on_panic(enum con_flu +@@ -3164,7 +3390,7 @@ void console_flush_on_panic(enum con_flu seq = prb_first_valid_seq(prb); for_each_console(c) @@ -516,7 +514,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> } console_unlock(); } -@@ -3379,19 +3605,22 @@ void register_console(struct console *ne +@@ -3407,19 +3633,22 @@ void register_console(struct console *ne if (newcon->flags & CON_EXTENDED) nr_ext_console_drivers++; @@ -542,7 +540,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> } if (printk_kthreads_available) -@@ -3480,6 +3709,10 @@ int unregister_console(struct console *c +@@ -3508,6 +3737,10 @@ int unregister_console(struct console *c console_sysfs_notify(); @@ -553,7 +551,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> if (console->exit) res = console->exit(console); -@@ -3609,7 +3842,7 @@ static bool __pr_flush(struct console *c +@@ -3637,7 +3870,7 @@ static bool __pr_flush(struct console *c for_each_console(c) { if (con && con != c) continue; @@ -562,7 +560,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> continue; printk_seq = c->seq; if (printk_seq < seq) -@@ -3691,9 +3924,10 @@ static void printk_fallback_preferred_di +@@ -3719,9 +3952,10 @@ static void printk_fallback_preferred_di * See __console_emit_next_record() for argument and return details. */ static bool console_emit_next_record(struct console *con, char *text, char *ext_text, @@ -575,7 +573,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> } static bool printer_should_wake(struct console *con, u64 seq) -@@ -3731,6 +3965,11 @@ static int printk_kthread_func(void *dat +@@ -3762,6 +3996,11 @@ static int printk_kthread_func(void *dat char *text; int error; @@ -587,7 +585,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> text = kmalloc(CONSOLE_LOG_MAX, GFP_KERNEL); if (!text) { con_printk(KERN_ERR, con, "failed to allocate text buffer\n"); -@@ -3809,7 +4048,7 @@ static int printk_kthread_func(void *dat +@@ -3840,7 +4079,7 @@ static int printk_kthread_func(void *dat * which can conditionally invoke cond_resched(). */ console_may_schedule = 0; diff --git a/debian/patches-rt/0017-serial-8250-implement-write_atomic.patch b/debian/patches-rt/0017-serial-8250-implement-write_atomic.patch index b2fd682c1..75c6dbc57 100644 --- a/debian/patches-rt/0017-serial-8250-implement-write_atomic.patch +++ b/debian/patches-rt/0017-serial-8250-implement-write_atomic.patch @@ -1,7 +1,7 @@ From: John Ogness <john.ogness@linutronix.de> Date: Fri, 4 Feb 2022 16:01:17 +0106 Subject: [PATCH 17/18] serial: 8250: implement write_atomic -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/5.18/older/patches-5.18-rt11.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/5.19/older/patches-5.19-rc3-rt5.tar.xz Implement a non-sleeping NMI-safe write_atomic() console function in order to support atomic console printing during a panic. @@ -36,7 +36,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> --- a/drivers/tty/serial/8250/8250.h +++ b/drivers/tty/serial/8250/8250.h -@@ -132,12 +132,74 @@ static inline void serial_dl_write(struc +@@ -157,12 +157,74 @@ static inline void serial_dl_write(struc up->dl_write(up, value); } @@ -112,7 +112,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> return true; } -@@ -146,7 +208,7 @@ static inline bool serial8250_clear_THRI +@@ -171,7 +233,7 @@ static inline bool serial8250_clear_THRI if (!(up->ier & UART_IER_THRI)) return false; up->ier &= ~UART_IER_THRI; @@ -211,7 +211,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> return HRTIMER_NORESTART; --- a/drivers/tty/serial/8250/8250_core.c +++ b/drivers/tty/serial/8250/8250_core.c -@@ -255,8 +255,11 @@ static void serial8250_timeout(struct ti +@@ -254,8 +254,11 @@ static void serial8250_timeout(struct ti static void serial8250_backup_timeout(struct timer_list *t) { struct uart_8250_port *up = from_timer(up, t, timer); @@ -223,7 +223,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> spin_lock_irqsave(&up->port.lock, flags); -@@ -265,8 +268,16 @@ static void serial8250_backup_timeout(st +@@ -264,8 +267,16 @@ static void serial8250_backup_timeout(st * based handler. */ if (up->port.irq) { @@ -240,7 +240,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> } iir = serial_in(up, UART_IIR); -@@ -290,7 +301,7 @@ static void serial8250_backup_timeout(st +@@ -289,7 +300,7 @@ static void serial8250_backup_timeout(st serial8250_tx_chars(up); if (up->port.irq) @@ -249,7 +249,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> spin_unlock_irqrestore(&up->port.lock, flags); -@@ -567,6 +578,14 @@ serial8250_register_ports(struct uart_dr +@@ -566,6 +577,14 @@ serial8250_register_ports(struct uart_dr #ifdef CONFIG_SERIAL_8250_CONSOLE @@ -264,7 +264,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> static void univ8250_console_write(struct console *co, const char *s, unsigned int count) { -@@ -660,6 +679,7 @@ static int univ8250_console_match(struct +@@ -659,6 +678,7 @@ static int univ8250_console_match(struct static struct console univ8250_console = { .name = "ttyS", @@ -272,7 +272,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> .write = univ8250_console_write, .device = uart_console_device, .setup = univ8250_console_setup, -@@ -953,7 +973,7 @@ static void serial_8250_overrun_backoff_ +@@ -952,7 +972,7 @@ static void serial_8250_overrun_backoff_ spin_lock_irqsave(&port->lock, flags); up->ier |= UART_IER_RLSI | UART_IER_RDI; up->port.read_status_mask |= UART_LSR_DR; @@ -334,7 +334,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> value |= UART_MCR_MDCE | UART_MCR_FCM; --- a/drivers/tty/serial/8250/8250_mtk.c +++ b/drivers/tty/serial/8250/8250_mtk.c -@@ -218,12 +218,40 @@ static void mtk8250_shutdown(struct uart +@@ -222,12 +222,40 @@ static void mtk8250_shutdown(struct uart static void mtk8250_disable_intrs(struct uart_8250_port *up, int mask) { @@ -468,7 +468,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> --- a/drivers/tty/serial/8250/8250_port.c +++ b/drivers/tty/serial/8250/8250_port.c -@@ -770,7 +770,7 @@ static void serial8250_set_sleep(struct +@@ -749,7 +749,7 @@ static void serial8250_set_sleep(struct serial_out(p, UART_EFR, UART_EFR_ECB); serial_out(p, UART_LCR, 0); } @@ -477,7 +477,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> if (p->capabilities & UART_CAP_EFR) { serial_out(p, UART_LCR, UART_LCR_CONF_MODE_B); serial_out(p, UART_EFR, efr); -@@ -1044,8 +1044,11 @@ static int broken_efr(struct uart_8250_p +@@ -1023,8 +1023,11 @@ static int broken_efr(struct uart_8250_p */ static void autoconfig_16550a(struct uart_8250_port *up) { @@ -489,7 +489,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> up->port.type = PORT_16550A; up->capabilities |= UART_CAP_FIFO; -@@ -1156,6 +1159,11 @@ static void autoconfig_16550a(struct uar +@@ -1135,6 +1138,11 @@ static void autoconfig_16550a(struct uar return; } @@ -501,7 +501,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> /* * Try writing and reading the UART_IER_UUE bit (b6). * If it works, this is probably one of the Xscale platform's -@@ -1191,6 +1199,9 @@ static void autoconfig_16550a(struct uar +@@ -1170,6 +1178,9 @@ static void autoconfig_16550a(struct uar } serial_out(up, UART_IER, iersave); @@ -511,7 +511,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> /* * We distinguish between 16550A and U6 16550A by counting * how many bytes are in the FIFO. -@@ -1213,8 +1224,10 @@ static void autoconfig(struct uart_8250_ +@@ -1192,8 +1203,10 @@ static void autoconfig(struct uart_8250_ unsigned char status1, scratch, scratch2, scratch3; unsigned char save_lcr, save_mcr; struct uart_port *port = &up->port; @@ -522,7 +522,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> if (!port->iobase && !port->mapbase && !port->membase) return; -@@ -1232,6 +1245,11 @@ static void autoconfig(struct uart_8250_ +@@ -1211,6 +1224,11 @@ static void autoconfig(struct uart_8250_ up->bugs = 0; if (!(port->flags & UPF_BUGGY_UART)) { @@ -534,7 +534,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> /* * Do a simple existence test first; if we fail this, * there's no point trying anything else. -@@ -1261,6 +1279,10 @@ static void autoconfig(struct uart_8250_ +@@ -1240,6 +1258,10 @@ static void autoconfig(struct uart_8250_ #endif scratch3 = serial_in(up, UART_IER) & 0x0f; serial_out(up, UART_IER, scratch); @@ -545,7 +545,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> if (scratch2 != 0 || scratch3 != 0x0F) { /* * We failed; there's nothing here -@@ -1358,10 +1380,7 @@ static void autoconfig(struct uart_8250_ +@@ -1337,10 +1359,7 @@ static void autoconfig(struct uart_8250_ serial8250_out_MCR(up, save_mcr); serial8250_clear_fifos(up); serial_in(up, UART_RX); @@ -557,7 +557,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> out_unlock: spin_unlock_irqrestore(&port->lock, flags); -@@ -1387,7 +1406,9 @@ static void autoconfig_irq(struct uart_8 +@@ -1366,7 +1385,9 @@ static void autoconfig_irq(struct uart_8 unsigned char save_mcr, save_ier; unsigned char save_ICP = 0; unsigned int ICP = 0; @@ -567,7 +567,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> int irq; if (port->flags & UPF_FOURPORT) { -@@ -1397,8 +1418,12 @@ static void autoconfig_irq(struct uart_8 +@@ -1376,8 +1397,12 @@ static void autoconfig_irq(struct uart_8 inb_p(ICP); } @@ -581,7 +581,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> /* forget possible initially masked and pending IRQ */ probe_irq_off(probe_irq_on()); -@@ -1430,8 +1455,10 @@ static void autoconfig_irq(struct uart_8 +@@ -1409,8 +1434,10 @@ static void autoconfig_irq(struct uart_8 if (port->flags & UPF_FOURPORT) outb_p(save_ICP, ICP); @@ -593,7 +593,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> port->irq = (irq > 0) ? irq : 0; } -@@ -1444,7 +1471,7 @@ static void serial8250_stop_rx(struct ua +@@ -1423,7 +1450,7 @@ static void serial8250_stop_rx(struct ua up->ier &= ~(UART_IER_RLSI | UART_IER_RDI); up->port.read_status_mask &= ~UART_LSR_DR; @@ -602,7 +602,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> serial8250_rpm_put(up); } -@@ -1474,7 +1501,7 @@ void serial8250_em485_stop_tx(struct uar +@@ -1453,7 +1480,7 @@ void serial8250_em485_stop_tx(struct uar serial8250_clear_and_reinit_fifos(p); p->ier |= UART_IER_RLSI | UART_IER_RDI; @@ -611,7 +611,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> } } EXPORT_SYMBOL_GPL(serial8250_em485_stop_tx); -@@ -1711,7 +1738,7 @@ static void serial8250_disable_ms(struct +@@ -1709,7 +1736,7 @@ static void serial8250_disable_ms(struct mctrl_gpio_disable_ms(up->gpios); up->ier &= ~UART_IER_MSI; @@ -620,7 +620,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> } static void serial8250_enable_ms(struct uart_port *port) -@@ -1727,7 +1754,7 @@ static void serial8250_enable_ms(struct +@@ -1725,7 +1752,7 @@ static void serial8250_enable_ms(struct up->ier |= UART_IER_MSI; serial8250_rpm_get(up); @@ -629,7 +629,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> serial8250_rpm_put(up); } -@@ -2146,14 +2173,7 @@ static void serial8250_put_poll_char(str +@@ -2154,14 +2181,7 @@ static void serial8250_put_poll_char(str struct uart_8250_port *up = up_to_u8250p(port); serial8250_rpm_get(up); @@ -645,7 +645,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> wait_for_xmitr(up, BOTH_EMPTY); /* -@@ -2166,7 +2186,7 @@ static void serial8250_put_poll_char(str +@@ -2174,7 +2194,7 @@ static void serial8250_put_poll_char(str * and restore the IER */ wait_for_xmitr(up, BOTH_EMPTY); @@ -654,7 +654,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> serial8250_rpm_put(up); } -@@ -2175,8 +2195,10 @@ static void serial8250_put_poll_char(str +@@ -2183,8 +2203,10 @@ static void serial8250_put_poll_char(str int serial8250_do_startup(struct uart_port *port) { struct uart_8250_port *up = up_to_u8250p(port); @@ -665,7 +665,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> int retval; if (!port->fifosize) -@@ -2196,7 +2218,7 @@ int serial8250_do_startup(struct uart_po +@@ -2204,7 +2226,7 @@ int serial8250_do_startup(struct uart_po up->acr = 0; serial_port_out(port, UART_LCR, UART_LCR_CONF_MODE_B); serial_port_out(port, UART_EFR, UART_EFR_ECB); @@ -674,7 +674,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> serial_port_out(port, UART_LCR, 0); serial_icr_write(up, UART_CSR, 0); /* Reset the UART */ serial_port_out(port, UART_LCR, UART_LCR_CONF_MODE_B); -@@ -2206,7 +2228,7 @@ int serial8250_do_startup(struct uart_po +@@ -2214,7 +2236,7 @@ int serial8250_do_startup(struct uart_po if (port->type == PORT_DA830) { /* Reset the port */ @@ -683,7 +683,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> serial_port_out(port, UART_DA830_PWREMU_MGMT, 0); mdelay(10); -@@ -2301,6 +2323,8 @@ int serial8250_do_startup(struct uart_po +@@ -2309,6 +2331,8 @@ int serial8250_do_startup(struct uart_po if (port->irq && (up->port.flags & UPF_SHARE_IRQ)) up->port.irqflags |= IRQF_SHARED; @@ -692,7 +692,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> if (port->irq && !(up->port.flags & UPF_NO_THRE_TEST)) { unsigned char iir1; -@@ -2317,6 +2341,9 @@ int serial8250_do_startup(struct uart_po +@@ -2325,6 +2349,9 @@ int serial8250_do_startup(struct uart_po */ spin_lock_irqsave(&port->lock, flags); @@ -702,7 +702,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> wait_for_xmitr(up, UART_LSR_THRE); serial_port_out_sync(port, UART_IER, UART_IER_THRI); udelay(1); /* allow THRE to set */ -@@ -2327,6 +2354,9 @@ int serial8250_do_startup(struct uart_po +@@ -2335,6 +2362,9 @@ int serial8250_do_startup(struct uart_po iir = serial_port_in(port, UART_IIR); serial_port_out(port, UART_IER, 0); @@ -712,7 +712,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> spin_unlock_irqrestore(&port->lock, flags); if (port->irqflags & IRQF_SHARED) -@@ -2383,10 +2413,14 @@ int serial8250_do_startup(struct uart_po +@@ -2391,10 +2421,14 @@ int serial8250_do_startup(struct uart_po * Do a quick test to see if we receive an interrupt when we enable * the TX irq. */ @@ -727,7 +727,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> if (lsr & UART_LSR_TEMT && iir & UART_IIR_NO_INT) { if (!(up->bugs & UART_BUG_TXEN)) { -@@ -2418,7 +2452,7 @@ int serial8250_do_startup(struct uart_po +@@ -2426,7 +2460,7 @@ int serial8250_do_startup(struct uart_po if (up->dma) { const char *msg = NULL; @@ -736,7 +736,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> msg = "forbid DMA for kernel console"; else if (serial8250_request_dma(up)) msg = "failed to request DMA"; -@@ -2469,7 +2503,7 @@ void serial8250_do_shutdown(struct uart_ +@@ -2477,7 +2511,7 @@ void serial8250_do_shutdown(struct uart_ */ spin_lock_irqsave(&port->lock, flags); up->ier = 0; @@ -745,7 +745,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> spin_unlock_irqrestore(&port->lock, flags); synchronize_irq(port->irq); -@@ -2837,7 +2871,7 @@ serial8250_do_set_termios(struct uart_po +@@ -2843,7 +2877,7 @@ serial8250_do_set_termios(struct uart_po if (up->capabilities & UART_CAP_RTOIE) up->ier |= UART_IER_RTOIE; @@ -754,7 +754,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> if (up->capabilities & UART_CAP_EFR) { unsigned char efr = 0; -@@ -3303,7 +3337,7 @@ EXPORT_SYMBOL_GPL(serial8250_set_default +@@ -3309,7 +3343,7 @@ EXPORT_SYMBOL_GPL(serial8250_set_default #ifdef CONFIG_SERIAL_8250_CONSOLE @@ -763,7 +763,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> { struct uart_8250_port *up = up_to_u8250p(port); -@@ -3311,6 +3345,18 @@ static void serial8250_console_putchar(s +@@ -3317,6 +3351,18 @@ static void serial8250_console_putchar(s serial_port_out(port, UART_TX, ch); } @@ -782,7 +782,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> /* * Restore serial console when h/w power-off detected */ -@@ -3332,6 +3378,32 @@ static void serial8250_console_restore(s +@@ -3338,6 +3384,32 @@ static void serial8250_console_restore(s serial8250_out_MCR(up, up->mcr | UART_MCR_DTR | UART_MCR_RTS); } @@ -813,12 +813,12 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +} + /* - * Print a string to the serial port trying not to disturb - * any possible real use of the port... -@@ -3348,24 +3420,12 @@ void serial8250_console_write(struct uar + * Print a string to the serial port using the device FIFO + * +@@ -3383,24 +3455,12 @@ void serial8250_console_write(struct uar struct uart_port *port = &up->port; unsigned long flags; - unsigned int ier; + unsigned int ier, use_fifo; - int locked = 1; touch_nmi_watchdog(); @@ -842,17 +842,20 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> /* check scratch reg to see if port powered off during system sleep */ if (up->canary && (up->canary != serial_port_in(port, UART_SCR))) { -@@ -3379,7 +3439,9 @@ void serial8250_console_write(struct uar - mdelay(port->rs485.delay_rts_before_send); - } +@@ -3434,10 +3494,12 @@ void serial8250_console_write(struct uar + */ + !(up->port.flags & UPF_CONS_FLOW); + atomic_inc(&up->console_printing); - uart_console_write(port, s, count, serial8250_console_putchar); + if (likely(use_fifo)) + serial8250_console_fifo_write(up, s, count); + else + uart_console_write(port, s, count, serial8250_console_putchar); + atomic_dec(&up->console_printing); /* * Finally, wait for transmitter to become empty -@@ -3392,8 +3454,7 @@ void serial8250_console_write(struct uar +@@ -3450,8 +3512,7 @@ void serial8250_console_write(struct uar if (em485->tx_stopped) up->rs485_stop_tx(up); } @@ -862,7 +865,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> /* * The receive handling will happen properly because the -@@ -3405,8 +3466,7 @@ void serial8250_console_write(struct uar +@@ -3463,8 +3524,7 @@ void serial8250_console_write(struct uar if (up->msr_saved_flags) serial8250_modem_status(up); @@ -872,7 +875,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> } static unsigned int probe_baud(struct uart_port *port) -@@ -3426,6 +3486,7 @@ static unsigned int probe_baud(struct ua +@@ -3484,6 +3544,7 @@ static unsigned int probe_baud(struct ua int serial8250_console_setup(struct uart_port *port, char *options, bool probe) { @@ -880,7 +883,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> int baud = 9600; int bits = 8; int parity = 'n'; -@@ -3435,6 +3496,8 @@ int serial8250_console_setup(struct uart +@@ -3493,6 +3554,8 @@ int serial8250_console_setup(struct uart if (!port->iobase && !port->membase) return -ENODEV; diff --git a/debian/patches-rt/0018-printk-avoid-preempt_disable-for-PREEMPT_RT.patch b/debian/patches-rt/0018-printk-avoid-preempt_disable-for-PREEMPT_RT.patch index de4f5d1f2..a578ee14f 100644 --- a/debian/patches-rt/0018-printk-avoid-preempt_disable-for-PREEMPT_RT.patch +++ b/debian/patches-rt/0018-printk-avoid-preempt_disable-for-PREEMPT_RT.patch @@ -1,7 +1,7 @@ From: John Ogness <john.ogness@linutronix.de> Date: Fri, 4 Feb 2022 16:01:17 +0106 Subject: [PATCH 18/18] printk: avoid preempt_disable() for PREEMPT_RT -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/5.18/older/patches-5.18-rt11.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/5.19/older/patches-5.19-rc3-rt5.tar.xz During non-normal operation, printk() calls will attempt to write the messages directly to the consoles. This involves @@ -31,7 +31,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c -@@ -1956,6 +1956,7 @@ static int console_lock_spinning_disable +@@ -1984,6 +1984,7 @@ static int console_lock_spinning_disable return 1; } @@ -39,7 +39,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> /** * console_trylock_spinning - try to get console_lock by busy waiting * -@@ -2029,6 +2030,7 @@ static int console_trylock_spinning(void +@@ -2057,6 +2058,7 @@ static int console_trylock_spinning(void return 1; } @@ -47,7 +47,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> /* * Call the specified console driver, asking it to write out the specified -@@ -2368,6 +2370,18 @@ asmlinkage int vprintk_emit(int facility +@@ -2396,6 +2398,18 @@ asmlinkage int vprintk_emit(int facility /* If called from the scheduler, we can not call up(). */ if (!in_sched && allow_direct_printing()) { @@ -66,7 +66,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> /* * The caller may be holding system-critical or * timing-sensitive locks. Disable preemption during direct -@@ -2385,6 +2399,7 @@ asmlinkage int vprintk_emit(int facility +@@ -2413,6 +2427,7 @@ asmlinkage int vprintk_emit(int facility if (console_trylock_spinning()) console_unlock(); preempt_enable(); @@ -74,7 +74,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> } wake_up_klogd(); -@@ -3075,8 +3090,12 @@ static bool console_emit_next_record_tra +@@ -3103,8 +3118,12 @@ static bool console_emit_next_record_tra /* * Handovers are only supported if threaded printers are atomically * blocked. The context taking over the console_lock may be atomic. diff --git a/debian/patches-rt/ARM64__Allow_to_enable_RT.patch b/debian/patches-rt/ARM64__Allow_to_enable_RT.patch index c7567eda9..7896cc500 100644 --- a/debian/patches-rt/ARM64__Allow_to_enable_RT.patch +++ b/debian/patches-rt/ARM64__Allow_to_enable_RT.patch @@ -1,7 +1,7 @@ Subject: ARM64: Allow to enable RT From: Sebastian Andrzej Siewior <bigeasy@linutronix.de> Date: Fri Oct 11 13:14:35 2019 +0200 -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/5.18/older/patches-5.18-rt11.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/5.19/older/patches-5.19-rc3-rt5.tar.xz From: Sebastian Andrzej Siewior <bigeasy@linutronix.de> @@ -17,10 +17,10 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> --- --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig -@@ -91,6 +91,7 @@ config ARM64 - select ARCH_SUPPORTS_ATOMIC_RMW +@@ -93,6 +93,7 @@ config ARM64 select ARCH_SUPPORTS_INT128 if CC_HAS_INT128 select ARCH_SUPPORTS_NUMA_BALANCING + select ARCH_SUPPORTS_PAGE_TABLE_CHECK + select ARCH_SUPPORTS_RT select ARCH_WANT_COMPAT_IPC_PARSE_VERSION if COMPAT select ARCH_WANT_DEFAULT_BPF_JIT diff --git a/debian/patches-rt/ARM__Allow_to_enable_RT.patch b/debian/patches-rt/ARM__Allow_to_enable_RT.patch index 9a1aa019f..b7aa1be17 100644 --- a/debian/patches-rt/ARM__Allow_to_enable_RT.patch +++ b/debian/patches-rt/ARM__Allow_to_enable_RT.patch @@ -1,7 +1,7 @@ Subject: ARM: Allow to enable RT From: Sebastian Andrzej Siewior <bigeasy@linutronix.de> Date: Fri Oct 11 13:14:29 2019 +0200 -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/5.18/older/patches-5.18-rt11.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/5.19/older/patches-5.19-rc3-rt5.tar.xz From: Sebastian Andrzej Siewior <bigeasy@linutronix.de> diff --git a/debian/patches-rt/ARM__enable_irq_in_translation_section_permission_fault_handlers.patch b/debian/patches-rt/ARM__enable_irq_in_translation_section_permission_fault_handlers.patch index 0aa0f2287..e63562b46 100644 --- a/debian/patches-rt/ARM__enable_irq_in_translation_section_permission_fault_handlers.patch +++ b/debian/patches-rt/ARM__enable_irq_in_translation_section_permission_fault_handlers.patch @@ -1,7 +1,7 @@ Subject: ARM: enable irq in translation/section permission fault handlers From: Yadi.hu <yadi.hu@windriver.com> Date: Wed Dec 10 10:32:09 2014 +0800 -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/5.18/older/patches-5.18-rt11.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/5.19/older/patches-5.19-rc3-rt5.tar.xz From: Yadi.hu <yadi.hu@windriver.com> diff --git a/debian/patches-rt/Add_localversion_for_-RT_release.patch b/debian/patches-rt/Add_localversion_for_-RT_release.patch index dd5aded96..1ba84649a 100644 --- a/debian/patches-rt/Add_localversion_for_-RT_release.patch +++ b/debian/patches-rt/Add_localversion_for_-RT_release.patch @@ -1,7 +1,7 @@ Subject: Add localversion for -RT release From: Thomas Gleixner <tglx@linutronix.de> Date: Fri Jul 8 20:25:16 2011 +0200 -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/5.18/older/patches-5.18-rt11.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/5.19/older/patches-5.19-rc3-rt5.tar.xz From: Thomas Gleixner <tglx@linutronix.de> @@ -16,4 +16,4 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> --- /dev/null +++ b/localversion-rt @@ -0,0 +1 @@ -+-rt11 ++-rt5 diff --git a/debian/patches-rt/POWERPC__Allow_to_enable_RT.patch b/debian/patches-rt/POWERPC__Allow_to_enable_RT.patch index 74b00215e..622fa8a9f 100644 --- a/debian/patches-rt/POWERPC__Allow_to_enable_RT.patch +++ b/debian/patches-rt/POWERPC__Allow_to_enable_RT.patch @@ -1,7 +1,7 @@ Subject: POWERPC: Allow to enable RT From: Sebastian Andrzej Siewior <bigeasy@linutronix.de> Date: Fri Oct 11 13:14:41 2019 +0200 -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/5.18/older/patches-5.18-rt11.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/5.19/older/patches-5.19-rc3-rt5.tar.xz From: Sebastian Andrzej Siewior <bigeasy@linutronix.de> @@ -17,7 +17,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> --- --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig -@@ -149,6 +149,7 @@ config PPC +@@ -150,6 +150,7 @@ config PPC select ARCH_STACKWALK select ARCH_SUPPORTS_ATOMIC_RMW select ARCH_SUPPORTS_DEBUG_PAGEALLOC if PPC_BOOK3S || PPC_8xx || 40x @@ -25,11 +25,11 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> select ARCH_USE_BUILTIN_BSWAP select ARCH_USE_CMPXCHG_LOCKREF if PPC64 select ARCH_USE_MEMTEST -@@ -219,6 +220,7 @@ config PPC - select HAVE_HW_BREAKPOINT if PERF_EVENTS && (PPC_BOOK3S || PPC_8xx) - select HAVE_IOREMAP_PROT - select HAVE_IRQ_TIME_ACCOUNTING +@@ -243,6 +244,7 @@ config PPC + select HAVE_PREEMPT_LAZY + select HAVE_REGS_AND_STACK_ACCESS_API + select HAVE_RELIABLE_STACKTRACE + select HAVE_POSIX_CPU_TIMERS_TASK_WORK if !KVM - select HAVE_KERNEL_GZIP - select HAVE_KERNEL_LZMA if DEFAULT_UIMAGE - select HAVE_KERNEL_LZO if DEFAULT_UIMAGE + select HAVE_RSEQ + select HAVE_SETUP_PER_CPU_AREA if PPC64 + select HAVE_SOFTIRQ_ON_OWN_STACK diff --git a/debian/patches-rt/Revert-drm-i915-Depend-on-PREEMPT_RT.patch b/debian/patches-rt/Revert-drm-i915-Depend-on-PREEMPT_RT.patch index 1f6997d8e..609ccd5a9 100644 --- a/debian/patches-rt/Revert-drm-i915-Depend-on-PREEMPT_RT.patch +++ b/debian/patches-rt/Revert-drm-i915-Depend-on-PREEMPT_RT.patch @@ -1,7 +1,7 @@ From: Sebastian Andrzej Siewior <bigeasy@linutronix.de> Date: Mon, 21 Feb 2022 17:59:14 +0100 Subject: [PATCH] Revert "drm/i915: Depend on !PREEMPT_RT." -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/5.18/older/patches-5.18-rt11.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/5.19/older/patches-5.19-rc3-rt5.tar.xz Once the known issues are addressed, it should be safe to enable the driver. @@ -18,6 +18,6 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> depends on DRM depends on X86 && PCI - depends on !PREEMPT_RT - select INTEL_GTT + select INTEL_GTT if X86 select INTERVAL_TREE # we need shmfs for the swappable backing store, and in particular diff --git a/debian/patches-rt/SUNRPC-Don-t-disable-preemption-while-calling-svc_po.patch b/debian/patches-rt/SUNRPC-Don-t-disable-preemption-while-calling-svc_po.patch deleted file mode 100644 index f7a081459..000000000 --- a/debian/patches-rt/SUNRPC-Don-t-disable-preemption-while-calling-svc_po.patch +++ /dev/null @@ -1,56 +0,0 @@ -From: Sebastian Andrzej Siewior <bigeasy@linutronix.de> -Date: Wed, 4 May 2022 19:24:10 +0200 -Subject: [PATCH] SUNRPC: Don't disable preemption while calling - svc_pool_for_cpu(). -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/5.18/older/patches-5.18-rt11.tar.xz - -svc_xprt_enqueue() disables preemption via get_cpu() and then asks for a -pool of a specific CPU (current) via svc_pool_for_cpu(). -With disabled preemption it acquires svc_pool::sp_lock, a spinlock_t, -which is a sleeping lock on PREEMPT_RT and can't be acquired with -disabled preemption. - -Disabling preemption is not required here. The pool is protected with a -lock so the following list access is safe even cross-CPU. The following -iteration through svc_pool::sp_all_threads is under RCU-readlock and -remaining operations within the loop are atomic and do not rely on -disabled-preemption. - -Use raw_smp_processor_id() as the argument for the requested CPU in -svc_pool_for_cpu(). - -Reported-by: Mike Galbraith <umgwanakikbuti@gmail.com> -Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> -Link: https://lore.kernel.org/r/YnK2ujabd2+oCrT/@linutronix.de ---- - net/sunrpc/svc_xprt.c | 5 +---- - 1 file changed, 1 insertion(+), 4 deletions(-) - ---- a/net/sunrpc/svc_xprt.c -+++ b/net/sunrpc/svc_xprt.c -@@ -448,7 +448,6 @@ void svc_xprt_enqueue(struct svc_xprt *x - { - struct svc_pool *pool; - struct svc_rqst *rqstp = NULL; -- int cpu; - - if (!svc_xprt_ready(xprt)) - return; -@@ -461,8 +460,7 @@ void svc_xprt_enqueue(struct svc_xprt *x - if (test_and_set_bit(XPT_BUSY, &xprt->xpt_flags)) - return; - -- cpu = get_cpu(); -- pool = svc_pool_for_cpu(xprt->xpt_server, cpu); -+ pool = svc_pool_for_cpu(xprt->xpt_server, raw_smp_processor_id()); - - atomic_long_inc(&pool->sp_stats.packets); - -@@ -485,7 +483,6 @@ void svc_xprt_enqueue(struct svc_xprt *x - rqstp = NULL; - out_unlock: - rcu_read_unlock(); -- put_cpu(); - trace_svc_xprt_enqueue(xprt, rqstp); - } - EXPORT_SYMBOL_GPL(svc_xprt_enqueue); diff --git a/debian/patches-rt/arch-Disable-softirq-stacks-on-PREEMPT_RT.patch b/debian/patches-rt/arch-Disable-softirq-stacks-on-PREEMPT_RT.patch new file mode 100644 index 000000000..afafc7eca --- /dev/null +++ b/debian/patches-rt/arch-Disable-softirq-stacks-on-PREEMPT_RT.patch @@ -0,0 +1,151 @@ +From: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +Date: Tue, 14 Jun 2022 20:18:14 +0200 +Subject: [PATCH] arch/*: Disable softirq stacks on PREEMPT_RT. +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/5.19/older/patches-5.19-rc3-rt5.tar.xz + +PREEMPT_RT preempts softirqs and the current implementation avoids +do_softirq_own_stack() and only uses __do_softirq(). + +Disable the unused softirqs stacks on PREEMPT_RT to save some memory and +ensure that do_softirq_own_stack() is not used bwcause it is not expected. + +Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +Signed-off-by: Arnd Bergmann <arnd@arndb.de> +Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +Link: https://lkml.kernel.org/r/CAK8P3a1QmeAscV-Ory-Dae4RoLvDSPEjEgFGQHR9U8jUervGuA@mail.gmail.com +--- + arch/arm/kernel/irq.c | 3 ++- + arch/parisc/kernel/irq.c | 2 ++ + arch/powerpc/kernel/irq.c | 4 ++++ + arch/s390/include/asm/softirq_stack.h | 3 ++- + arch/sh/kernel/irq.c | 2 ++ + arch/sparc/kernel/irq_64.c | 2 ++ + include/asm-generic/softirq_stack.h | 2 +- + 7 files changed, 15 insertions(+), 3 deletions(-) + +--- a/arch/arm/kernel/irq.c ++++ b/arch/arm/kernel/irq.c +@@ -70,6 +70,7 @@ static void __init init_irq_stacks(void) + } + } + ++#ifndef CONFIG_PREEMPT_RT + static void ____do_softirq(void *arg) + { + __do_softirq(); +@@ -80,7 +81,7 @@ void do_softirq_own_stack(void) + call_with_stack(____do_softirq, NULL, + __this_cpu_read(irq_stack_ptr)); + } +- ++#endif + #endif + + int arch_show_interrupts(struct seq_file *p, int prec) +--- a/arch/parisc/kernel/irq.c ++++ b/arch/parisc/kernel/irq.c +@@ -480,10 +480,12 @@ static void execute_on_irq_stack(void *f + *irq_stack_in_use = 1; + } + ++#ifndef CONFIG_PREEMPT_RT + void do_softirq_own_stack(void) + { + execute_on_irq_stack(__do_softirq, 0); + } ++#endif + #endif /* CONFIG_IRQSTACKS */ + + /* ONLY called from entry.S:intr_extint() */ +--- a/arch/powerpc/kernel/irq.c ++++ b/arch/powerpc/kernel/irq.c +@@ -611,6 +611,7 @@ static inline void check_stack_overflow( + } + } + ++#ifndef CONFIG_PREEMPT_RT + static __always_inline void call_do_softirq(const void *sp) + { + /* Temporarily switch r1 to sp, call __do_softirq() then restore r1. */ +@@ -629,6 +630,7 @@ static __always_inline void call_do_soft + "r11", "r12" + ); + } ++#endif + + static __always_inline void call_do_irq(struct pt_regs *regs, void *sp) + { +@@ -747,10 +749,12 @@ void *mcheckirq_ctx[NR_CPUS] __read_most + void *softirq_ctx[NR_CPUS] __read_mostly; + void *hardirq_ctx[NR_CPUS] __read_mostly; + ++#ifndef CONFIG_PREEMPT_RT + void do_softirq_own_stack(void) + { + call_do_softirq(softirq_ctx[smp_processor_id()]); + } ++#endif + + irq_hw_number_t virq_to_hw(unsigned int virq) + { +--- a/arch/s390/include/asm/softirq_stack.h ++++ b/arch/s390/include/asm/softirq_stack.h +@@ -5,9 +5,10 @@ + #include <asm/lowcore.h> + #include <asm/stacktrace.h> + ++#ifndef CONFIG_PREEMPT_RT + static inline void do_softirq_own_stack(void) + { + call_on_stack(0, S390_lowcore.async_stack, void, __do_softirq); + } +- ++#endif + #endif /* __ASM_S390_SOFTIRQ_STACK_H */ +--- a/arch/sh/kernel/irq.c ++++ b/arch/sh/kernel/irq.c +@@ -149,6 +149,7 @@ void irq_ctx_exit(int cpu) + hardirq_ctx[cpu] = NULL; + } + ++#ifndef CONFIG_PREEMPT_RT + void do_softirq_own_stack(void) + { + struct thread_info *curctx; +@@ -176,6 +177,7 @@ void do_softirq_own_stack(void) + "r5", "r6", "r7", "r8", "r9", "r15", "t", "pr" + ); + } ++#endif + #else + static inline void handle_one_irq(unsigned int irq) + { +--- a/arch/sparc/kernel/irq_64.c ++++ b/arch/sparc/kernel/irq_64.c +@@ -855,6 +855,7 @@ void __irq_entry handler_irq(int pil, st + set_irq_regs(old_regs); + } + ++#ifndef CONFIG_PREEMPT_RT + void do_softirq_own_stack(void) + { + void *orig_sp, *sp = softirq_stack[smp_processor_id()]; +@@ -869,6 +870,7 @@ void do_softirq_own_stack(void) + __asm__ __volatile__("mov %0, %%sp" + : : "r" (orig_sp)); + } ++#endif + + #ifdef CONFIG_HOTPLUG_CPU + void fixup_irqs(void) +--- a/include/asm-generic/softirq_stack.h ++++ b/include/asm-generic/softirq_stack.h +@@ -2,7 +2,7 @@ + #ifndef __ASM_GENERIC_SOFTIRQ_STACK_H + #define __ASM_GENERIC_SOFTIRQ_STACK_H + +-#ifdef CONFIG_HAVE_SOFTIRQ_ON_OWN_STACK ++#if defined(CONFIG_HAVE_SOFTIRQ_ON_OWN_STACK) && !defined(CONFIG_PREEMPT_RT) + void do_softirq_own_stack(void); + #else + static inline void do_softirq_own_stack(void) diff --git a/debian/patches-rt/arch_arm64__Add_lazy_preempt_support.patch b/debian/patches-rt/arch_arm64__Add_lazy_preempt_support.patch index 979a7898e..3246ad697 100644 --- a/debian/patches-rt/arch_arm64__Add_lazy_preempt_support.patch +++ b/debian/patches-rt/arch_arm64__Add_lazy_preempt_support.patch @@ -1,7 +1,7 @@ Subject: arch/arm64: Add lazy preempt support From: Anders Roxell <anders.roxell@linaro.org> Date: Thu May 14 17:52:17 2015 +0200 -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/5.18/older/patches-5.18-rt11.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/5.19/older/patches-5.19-rc3-rt5.tar.xz From: Anders Roxell <anders.roxell@linaro.org> @@ -26,7 +26,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> --- --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig -@@ -194,6 +194,7 @@ config ARM64 +@@ -197,6 +197,7 @@ config ARM64 select HAVE_PERF_USER_STACK_DUMP select HAVE_PREEMPT_DYNAMIC_KEY select HAVE_REGS_AND_STACK_ACCESS_API @@ -92,7 +92,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> #define TIF_SYSCALL_TRACE 8 /* syscall trace active */ #define TIF_SYSCALL_AUDIT 9 /* syscall auditing */ #define TIF_SYSCALL_TRACEPOINT 10 /* syscall tracepoint for ftrace */ -@@ -98,8 +100,10 @@ int arch_dup_task_struct(struct task_str +@@ -100,8 +102,10 @@ int arch_dup_task_struct(struct task_str #define _TIF_SVE (1 << TIF_SVE) #define _TIF_MTE_ASYNC_FAULT (1 << TIF_MTE_ASYNC_FAULT) #define _TIF_NOTIFY_SIGNAL (1 << TIF_NOTIFY_SIGNAL) @@ -104,7 +104,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> _TIF_NOTIFY_RESUME | _TIF_FOREIGN_FPSTATE | \ _TIF_UPROBE | _TIF_MTE_ASYNC_FAULT | \ _TIF_NOTIFY_SIGNAL) -@@ -108,6 +112,8 @@ int arch_dup_task_struct(struct task_str +@@ -110,6 +114,8 @@ int arch_dup_task_struct(struct task_str _TIF_SYSCALL_TRACEPOINT | _TIF_SECCOMP | \ _TIF_SYSCALL_EMU) @@ -125,7 +125,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> #endif --- a/arch/arm64/kernel/signal.c +++ b/arch/arm64/kernel/signal.c -@@ -921,7 +921,7 @@ static void do_signal(struct pt_regs *re +@@ -1089,7 +1089,7 @@ static void do_signal(struct pt_regs *re void do_notify_resume(struct pt_regs *regs, unsigned long thread_flags) { do { diff --git a/debian/patches-rt/arm64-mm-Make-arch_faults_on_old_pte-check-for-migra.patch b/debian/patches-rt/arm64-mm-Make-arch_faults_on_old_pte-check-for-migra.patch deleted file mode 100644 index 70d6eb43e..000000000 --- a/debian/patches-rt/arm64-mm-Make-arch_faults_on_old_pte-check-for-migra.patch +++ /dev/null @@ -1,37 +0,0 @@ -From: Valentin Schneider <valentin.schneider@arm.com> -Date: Thu, 27 Jan 2022 19:24:37 +0000 -Subject: [PATCH] arm64: mm: Make arch_faults_on_old_pte() check for - migratability -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/5.18/older/patches-5.18-rt11.tar.xz - -arch_faults_on_old_pte() relies on the calling context being -non-preemptible. CONFIG_PREEMPT_RT turns the PTE lock into a sleepable -spinlock, which doesn't disable preemption once acquired, triggering the -warning in arch_faults_on_old_pte(). - -It does however disable migration, ensuring the task remains on the same -CPU during the entirety of the critical section, making the read of -cpu_has_hw_af() safe and stable. - -Make arch_faults_on_old_pte() check cant_migrate() instead of preemptible(). - -Suggested-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> -Signed-off-by: Valentin Schneider <valentin.schneider@arm.com> -Link: https://lore.kernel.org/r/20220127192437.1192957-1-valentin.schneider@arm.com -Acked-by: Catalin Marinas <catalin.marinas@arm.com> ---- - arch/arm64/include/asm/pgtable.h | 3 ++- - 1 file changed, 2 insertions(+), 1 deletion(-) - ---- a/arch/arm64/include/asm/pgtable.h -+++ b/arch/arm64/include/asm/pgtable.h -@@ -1001,7 +1001,8 @@ static inline void update_mmu_cache(stru - */ - static inline bool arch_faults_on_old_pte(void) - { -- WARN_ON(preemptible()); -+ /* The register read below requires a stable CPU to make any sense */ -+ cant_migrate(); - - return !cpu_has_hw_af(); - } diff --git a/debian/patches-rt/arm64-sve-Delay-freeing-memory-in-fpsimd_flush_threa.patch b/debian/patches-rt/arm64-sve-Delay-freeing-memory-in-fpsimd_flush_threa.patch deleted file mode 100644 index 9c9848c6c..000000000 --- a/debian/patches-rt/arm64-sve-Delay-freeing-memory-in-fpsimd_flush_threa.patch +++ /dev/null @@ -1,44 +0,0 @@ -From: Sebastian Andrzej Siewior <bigeasy@linutronix.de> -Date: Thu, 29 Jul 2021 12:52:14 +0200 -Subject: [PATCH] arm64/sve: Delay freeing memory in fpsimd_flush_thread() -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/5.18/older/patches-5.18-rt11.tar.xz - -fpsimd_flush_thread() invokes kfree() via sve_free() within a preempt disabled -section which is not working on -RT. - -Delay freeing of memory until preemption is enabled again. - -Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> ---- - arch/arm64/kernel/fpsimd.c | 9 ++++++++- - 1 file changed, 8 insertions(+), 1 deletion(-) - ---- a/arch/arm64/kernel/fpsimd.c -+++ b/arch/arm64/kernel/fpsimd.c -@@ -1141,6 +1141,8 @@ static void fpsimd_flush_thread_vl(enum - - void fpsimd_flush_thread(void) - { -+ void *sve_state = NULL; -+ - if (!system_supports_fpsimd()) - return; - -@@ -1152,11 +1154,16 @@ void fpsimd_flush_thread(void) - - if (system_supports_sve()) { - clear_thread_flag(TIF_SVE); -- sve_free(current); -+ -+ /* Defer kfree() while in atomic context */ -+ sve_state = current->thread.sve_state; -+ current->thread.sve_state = NULL; -+ - fpsimd_flush_thread_vl(ARM64_VEC_SVE); - } - - put_cpu_fpsimd_context(); -+ kfree(sve_state); - } - - /* diff --git a/debian/patches-rt/arm64-sve-Make-kernel-FPU-protection-RT-friendly.patch b/debian/patches-rt/arm64-sve-Make-kernel-FPU-protection-RT-friendly.patch deleted file mode 100644 index 948e85745..000000000 --- a/debian/patches-rt/arm64-sve-Make-kernel-FPU-protection-RT-friendly.patch +++ /dev/null @@ -1,57 +0,0 @@ -From: Sebastian Andrzej Siewior <bigeasy@linutronix.de> -Date: Thu, 29 Jul 2021 10:36:30 +0200 -Subject: [PATCH] arm64/sve: Make kernel FPU protection RT friendly -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/5.18/older/patches-5.18-rt11.tar.xz - -Non RT kernels need to protect FPU against preemption and bottom half -processing. This is achieved by disabling bottom halves via -local_bh_disable() which implictly disables preemption. - -On RT kernels this protection mechanism is not sufficient because -local_bh_disable() does not disable preemption. It serializes bottom half -related processing via a CPU local lock. - -As bottom halves are running always in thread context on RT kernels -disabling preemption is the proper choice as it implicitly prevents bottom -half processing. - -Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> ---- - arch/arm64/kernel/fpsimd.c | 16 ++++++++++++++-- - 1 file changed, 14 insertions(+), 2 deletions(-) - ---- a/arch/arm64/kernel/fpsimd.c -+++ b/arch/arm64/kernel/fpsimd.c -@@ -206,10 +206,19 @@ static void __get_cpu_fpsimd_context(voi - * - * The double-underscore version must only be called if you know the task - * can't be preempted. -+ * -+ * On RT kernels local_bh_disable() is not sufficient because it only -+ * serializes soft interrupt related sections via a local lock, but stays -+ * preemptible. Disabling preemption is the right choice here as bottom -+ * half processing is always in thread context on RT kernels so it -+ * implicitly prevents bottom half processing as well. - */ - static void get_cpu_fpsimd_context(void) - { -- local_bh_disable(); -+ if (!IS_ENABLED(CONFIG_PREEMPT_RT)) -+ local_bh_disable(); -+ else -+ preempt_disable(); - __get_cpu_fpsimd_context(); - } - -@@ -230,7 +239,10 @@ static void __put_cpu_fpsimd_context(voi - static void put_cpu_fpsimd_context(void) - { - __put_cpu_fpsimd_context(); -- local_bh_enable(); -+ if (!IS_ENABLED(CONFIG_PREEMPT_RT)) -+ local_bh_enable(); -+ else -+ preempt_enable(); - } - - static bool have_cpu_fpsimd_context(void) diff --git a/debian/patches-rt/arm__Add_support_for_lazy_preemption.patch b/debian/patches-rt/arm__Add_support_for_lazy_preemption.patch index 076b52381..f09bc7f15 100644 --- a/debian/patches-rt/arm__Add_support_for_lazy_preemption.patch +++ b/debian/patches-rt/arm__Add_support_for_lazy_preemption.patch @@ -1,7 +1,7 @@ Subject: arm: Add support for lazy preemption From: Thomas Gleixner <tglx@linutronix.de> Date: Wed Oct 31 12:04:11 2012 +0100 -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/5.18/older/patches-5.18-rt11.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/5.19/older/patches-5.19-rc3-rt5.tar.xz From: Thomas Gleixner <tglx@linutronix.de> @@ -76,7 +76,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> DEFINE(TI_CPU_SAVE, offsetof(struct thread_info, cpu_context)); --- a/arch/arm/kernel/entry-armv.S +++ b/arch/arm/kernel/entry-armv.S -@@ -224,11 +224,18 @@ ENDPROC(__dabt_svc) +@@ -222,11 +222,18 @@ ENDPROC(__dabt_svc) #ifdef CONFIG_PREEMPTION ldr r8, [tsk, #TI_PREEMPT] @ get preempt count @@ -97,7 +97,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> #endif svc_exit r5, irq = 1 @ return from exception -@@ -243,8 +250,14 @@ ENDPROC(__irq_svc) +@@ -241,8 +248,14 @@ ENDPROC(__irq_svc) 1: bl preempt_schedule_irq @ irq en/disable is done inside ldr r0, [tsk, #TI_FLAGS] @ get new tasks TI_FLAGS tst r0, #_TIF_NEED_RESCHED diff --git a/debian/patches-rt/blk-mq-Don-t-disable-preemption-around-__blk_mq_run_.patch b/debian/patches-rt/blk-mq-Don-t-disable-preemption-around-__blk_mq_run_.patch index 664e26c6c..15f0dc718 100644 --- a/debian/patches-rt/blk-mq-Don-t-disable-preemption-around-__blk_mq_run_.patch +++ b/debian/patches-rt/blk-mq-Don-t-disable-preemption-around-__blk_mq_run_.patch @@ -1,8 +1,8 @@ From: Sebastian Andrzej Siewior <bigeasy@linutronix.de> -Date: Thu, 5 May 2022 19:21:47 +0200 +Date: Wed, 22 Jun 2022 09:42:37 +0200 Subject: [PATCH] blk-mq: Don't disable preemption around __blk_mq_run_hw_queue(). -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/5.18/older/patches-5.18-rt11.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/5.19/older/patches-5.19-rc3-rt5.tar.xz __blk_mq_delay_run_hw_queue() disables preemption to get a stable current CPU number and then invokes __blk_mq_run_hw_queue() if the CPU @@ -11,38 +11,35 @@ number is part the mask. __blk_mq_run_hw_queue() acquires a spin_lock_t which is a sleeping lock on PREEMPT_RT and can't be acquired with disabled preemption. -If it is important that the current CPU matches the requested CPU mask -and that the context does not migrate to another CPU while -__blk_mq_run_hw_queue() is invoked then it possible to achieve this by -disabling migration and keeping the context preemptible. +It is not required for correctness to invoke __blk_mq_run_hw_queue() on +a CPU matching hctx->cpumask. Both (async and direct requests) can run +on a CPU not matching hctx->cpumask. -Disable only migration while testing the CPU mask and invoking +The CPU mask without disabling preemption and invoking __blk_mq_run_hw_queue(). Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> -Link: https://lore.kernel.org/r/YnQHqx/5+54jd+U+@linutronix.de +Link: https://lkml.kernel.org/r/YrLSEiNvagKJaDs5@linutronix.de +Reviewed-by: Ming Lei <ming.lei@redhat.com> --- - block/blk-mq.c | 8 ++++---- - 1 file changed, 4 insertions(+), 4 deletions(-) + block/blk-mq.c | 6 +----- + 1 file changed, 1 insertion(+), 5 deletions(-) --- a/block/blk-mq.c +++ b/block/blk-mq.c -@@ -2046,14 +2046,14 @@ static void __blk_mq_delay_run_hw_queue( +@@ -2085,14 +2085,10 @@ static void __blk_mq_delay_run_hw_queue( return; if (!async && !(hctx->flags & BLK_MQ_F_BLOCKING)) { - int cpu = get_cpu(); - if (cpumask_test_cpu(cpu, hctx->cpumask)) { -+ migrate_disable(); + if (cpumask_test_cpu(raw_smp_processor_id(), hctx->cpumask)) { __blk_mq_run_hw_queue(hctx); - put_cpu(); -+ migrate_enable(); return; } - +- - put_cpu(); -+ migrate_enable(); } kblockd_mod_delayed_work_on(blk_mq_hctx_next_cpu(hctx), &hctx->run_work, diff --git a/debian/patches-rt/drivers_block_zram__Replace_bit_spinlocks_with_rtmutex_for_-rt.patch b/debian/patches-rt/drivers_block_zram__Replace_bit_spinlocks_with_rtmutex_for_-rt.patch index bc63e12b8..2eb37dd46 100644 --- a/debian/patches-rt/drivers_block_zram__Replace_bit_spinlocks_with_rtmutex_for_-rt.patch +++ b/debian/patches-rt/drivers_block_zram__Replace_bit_spinlocks_with_rtmutex_for_-rt.patch @@ -1,22 +1,24 @@ -Subject: drivers/block/zram: Replace bit spinlocks with rtmutex for -rt +Subject: zram: Replace bit spinlocks with spinlock_t for PREEMPT_RT. From: Mike Galbraith <umgwanakikbuti@gmail.com> Date: Thu Mar 31 04:08:28 2016 +0200 -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/5.18/older/patches-5.18-rt11.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/5.19/older/patches-5.19-rc3-rt5.tar.xz From: Mike Galbraith <umgwanakikbuti@gmail.com> -They're nondeterministic, and lead to ___might_sleep() splats in -rt. -OTOH, they're a lot less wasteful than an rtmutex per page. +The bit spinlock disables preemption on PREEMPT_RT. With disabled preemption it +is not allowed to acquire other sleeping locks which includes invoking +zs_free(). + +Use a spinlock_t on PREEMPT_RT for locking and set/ clear ZRAM_LOCK after the +lock has been acquired/ dropped. Signed-off-by: Mike Galbraith <umgwanakikbuti@gmail.com> Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> -Signed-off-by: Thomas Gleixner <tglx@linutronix.de> - - +Link: https://lkml.kernel.org/r/YqIbMuHCPiQk+Ac2@linutronix.de --- drivers/block/zram/zram_drv.c | 36 ++++++++++++++++++++++++++++++++++++ - drivers/block/zram/zram_drv.h | 1 + - 2 files changed, 37 insertions(+) + drivers/block/zram/zram_drv.h | 3 +++ + 2 files changed, 39 insertions(+) --- --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c @@ -69,7 +71,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> static inline bool init_done(struct zram *zram) { -@@ -1194,6 +1229,7 @@ static bool zram_meta_alloc(struct zram +@@ -1195,6 +1230,7 @@ static bool zram_meta_alloc(struct zram if (!huge_class_size) huge_class_size = zs_huge_class_size(zram->mem_pool); @@ -79,11 +81,13 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> --- a/drivers/block/zram/zram_drv.h +++ b/drivers/block/zram/zram_drv.h -@@ -63,6 +63,7 @@ struct zram_table_entry { +@@ -63,6 +63,9 @@ struct zram_table_entry { unsigned long element; }; unsigned long flags; ++#ifdef CONFIG_PREEMPT_RT + spinlock_t lock; ++#endif #ifdef CONFIG_ZRAM_MEMORY_TRACKING ktime_t ac_time; #endif diff --git a/debian/patches-rt/entry--Fix-the-preempt-lazy-fallout.patch b/debian/patches-rt/entry--Fix-the-preempt-lazy-fallout.patch index 9060be307..5e982d82e 100644 --- a/debian/patches-rt/entry--Fix-the-preempt-lazy-fallout.patch +++ b/debian/patches-rt/entry--Fix-the-preempt-lazy-fallout.patch @@ -1,7 +1,7 @@ Subject: entry: Fix the preempt lazy fallout From: Thomas Gleixner <tglx@linutronix.de> Date: Tue, 13 Jul 2021 07:52:52 +0200 -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/5.18/older/patches-5.18-rt11.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/5.19/older/patches-5.19-rc3-rt5.tar.xz Common code needs common defines.... @@ -15,7 +15,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> --- a/arch/x86/include/asm/thread_info.h +++ b/arch/x86/include/asm/thread_info.h -@@ -153,8 +153,6 @@ struct thread_info { +@@ -151,8 +151,6 @@ struct thread_info { #define _TIF_WORK_CTXSW_NEXT (_TIF_WORK_CTXSW) diff --git a/debian/patches-rt/fs-dcache-Delay-dentry-d_lock-outside-of-the-locked-.patch b/debian/patches-rt/fs-dcache-Delay-dentry-d_lock-outside-of-the-locked-.patch deleted file mode 100644 index 40922de28..000000000 --- a/debian/patches-rt/fs-dcache-Delay-dentry-d_lock-outside-of-the-locked-.patch +++ /dev/null @@ -1,149 +0,0 @@ -From: Sebastian Andrzej Siewior <bigeasy@linutronix.de> -Date: Fri, 13 May 2022 15:19:41 +0200 -Subject: [PATCH] fs/dcache: Delay dentry::d_lock outside of the locked - section. -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/5.18/older/patches-5.18-rt11.tar.xz - -__d_add() and __d_move() invoke __d_lookup_done() with disabled -preemption (due to the critical section started in start_dir_add()). -__d_lookup_done() additionally disables preemption by acquiring a bit -spinlock and then invokes wake_up_all(). - -This is problematic on PREEMPT_RT because wake_up_all() acquires -spinlock_t locks which must not be acquired with disabled preemption on -PREEMPT_RT. - -The wait_queue_head_t, which is assigned to dentry::d_wait, is always -provided by the caller of __d_lookup_done(). If d_alloc_parallel() -returns an allready existing dentry then dentry::d_wait does not belong -to the caller. In this case d_wait_lookup() ensured that the dentry is -no longer d_in_lookup() and so __d_lookup_done() is not invoked in order -to wake any possible waiter. - -The wake up could be delayed outside of the atomic section since it is -always owned by the __d_lookup_done() caller. It could even be moved -after dentry::d_lock has been released which is the lock, that the -waiter acquries after wake up. - -Rename __d_lookup_done() to __d_lookup_clear_d_wait() and make it return -dentry::d_wait and move the invocation of wake_up_all() to the caller. -Use this only internally dcache.c. Provide a __d_lookup_done() for -d_lookup_done() which performs the wake up. - -Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> ---- - fs/dcache.c | 27 +++++++++++++++++++++++---- - include/linux/dcache.h | 7 ++----- - 2 files changed, 25 insertions(+), 9 deletions(-) - ---- a/fs/dcache.c -+++ b/fs/dcache.c -@@ -2709,18 +2709,30 @@ struct dentry *d_alloc_parallel(struct d - } - EXPORT_SYMBOL(d_alloc_parallel); - --void __d_lookup_done(struct dentry *dentry) -+static wait_queue_head_t *__d_lookup_clear_d_wait(struct dentry *dentry) - { -+ wait_queue_head_t *d_wait; - struct hlist_bl_head *b = in_lookup_hash(dentry->d_parent, - dentry->d_name.hash); - hlist_bl_lock(b); - dentry->d_flags &= ~DCACHE_PAR_LOOKUP; - __hlist_bl_del(&dentry->d_u.d_in_lookup_hash); -- wake_up_all(dentry->d_wait); -+ d_wait = dentry->d_wait; - dentry->d_wait = NULL; - hlist_bl_unlock(b); - INIT_HLIST_NODE(&dentry->d_u.d_alias); - INIT_LIST_HEAD(&dentry->d_lru); -+ return d_wait; -+} -+ -+void __d_lookup_done(struct dentry *dentry) -+{ -+ wait_queue_head_t *wq_head; -+ -+ spin_lock(&dentry->d_lock); -+ wq_head = __d_lookup_clear_d_wait(dentry); -+ spin_unlock(&dentry->d_lock); -+ wake_up_all(wq_head); - } - EXPORT_SYMBOL(__d_lookup_done); - -@@ -2728,13 +2740,15 @@ EXPORT_SYMBOL(__d_lookup_done); - - static inline void __d_add(struct dentry *dentry, struct inode *inode) - { -+ wait_queue_head_t *d_wait = NULL; - struct inode *dir = NULL; -+ - unsigned n; - spin_lock(&dentry->d_lock); - if (unlikely(d_in_lookup(dentry))) { - dir = dentry->d_parent->d_inode; - n = start_dir_add(dir); -- __d_lookup_done(dentry); -+ d_wait = __d_lookup_clear_d_wait(dentry); - } - if (inode) { - unsigned add_flags = d_flags_for_inode(inode); -@@ -2748,6 +2762,8 @@ static inline void __d_add(struct dentry - if (dir) - end_dir_add(dir, n); - spin_unlock(&dentry->d_lock); -+ if (d_wait) -+ wake_up_all(d_wait); - if (inode) - spin_unlock(&inode->i_lock); - } -@@ -2892,6 +2908,7 @@ static void copy_name(struct dentry *den - static void __d_move(struct dentry *dentry, struct dentry *target, - bool exchange) - { -+ wait_queue_head_t *d_wait = NULL; - struct dentry *old_parent, *p; - struct inode *dir = NULL; - unsigned n; -@@ -2923,7 +2940,7 @@ static void __d_move(struct dentry *dent - if (unlikely(d_in_lookup(target))) { - dir = target->d_parent->d_inode; - n = start_dir_add(dir); -- __d_lookup_done(target); -+ d_wait = __d_lookup_clear_d_wait(target); - } - - write_seqcount_begin(&dentry->d_seq); -@@ -2967,6 +2984,8 @@ static void __d_move(struct dentry *dent - spin_unlock(&old_parent->d_lock); - spin_unlock(&target->d_lock); - spin_unlock(&dentry->d_lock); -+ if (d_wait) -+ wake_up_all(d_wait); - } - - /* ---- a/include/linux/dcache.h -+++ b/include/linux/dcache.h -@@ -349,7 +349,7 @@ static inline void dont_mount(struct den - spin_unlock(&dentry->d_lock); - } - --extern void __d_lookup_done(struct dentry *); -+extern void __d_lookup_done(struct dentry *dentry); - - static inline int d_in_lookup(const struct dentry *dentry) - { -@@ -358,11 +358,8 @@ static inline int d_in_lookup(const stru - - static inline void d_lookup_done(struct dentry *dentry) - { -- if (unlikely(d_in_lookup(dentry))) { -- spin_lock(&dentry->d_lock); -+ if (unlikely(d_in_lookup(dentry))) - __d_lookup_done(dentry); -- spin_unlock(&dentry->d_lock); -- } - } - - extern void dput(struct dentry *); diff --git a/debian/patches-rt/fs_dcache__disable_preemption_on_i_dir_seqs_write_side.patch b/debian/patches-rt/fs_dcache__disable_preemption_on_i_dir_seqs_write_side.patch deleted file mode 100644 index 36617acb5..000000000 --- a/debian/patches-rt/fs_dcache__disable_preemption_on_i_dir_seqs_write_side.patch +++ /dev/null @@ -1,65 +0,0 @@ -Subject: fs/dcache: disable preemption on i_dir_seq's write side -From: Sebastian Andrzej Siewior <bigeasy@linutronix.de> -Date: Fri Oct 20 11:29:53 2017 +0200 -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/5.18/older/patches-5.18-rt11.tar.xz - -From: Sebastian Andrzej Siewior <bigeasy@linutronix.de> - -i_dir_seq is a sequence counter with a lock which is represented by the lowest -bit. The writer atomically updates the counter which ensures that it can be -modified by only one writer at a time. -The commit introducing this change claims that the lock has been integrated -into the counter for space reasons within the inode struct. The i_dir_seq -member is within a union which shares also a pointer. That means by using -seqlock_t we would have a sequence counter and a lock without increasing the -size of the data structure on 64bit and 32bit would grow by 4 bytes. With -lockdep enabled the size would grow and on PREEMPT_RT the spinlock_t is also -larger. - -In order to keep this construct working on PREEMPT_RT, the writer needs to -disable preemption while obtaining the lock on the sequence counter / starting -the write critical section. The writer acquires an otherwise unrelated -spinlock_t which serves the same purpose on !PREEMPT_RT. With enabled -preemption a high priority reader could preempt the writer and live lock the -system while waiting for the locked bit to disappear. - -Another solution would be to have global spinlock_t which is always acquired -by the writer. The reader would then acquire the lock if the sequence count is -odd and by doing so force the writer out of the critical section. The global -spinlock_t could be replaced by a hashed lock based on the address of the inode -to lower the lock contention. - -For now, manually disable preemption on PREEMPT_RT to avoid live locks. - -Reported-by: Oleg.Karfich@wago.com -Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> ---- - fs/dcache.c | 10 +++++++++- - 1 file changed, 9 insertions(+), 1 deletion(-) ---- ---- a/fs/dcache.c -+++ b/fs/dcache.c -@@ -2563,7 +2563,13 @@ EXPORT_SYMBOL(d_rehash); - - static inline unsigned start_dir_add(struct inode *dir) - { -- -+ /* -+ * The caller has a spinlock_t (dentry::d_lock) acquired which disables -+ * preemption on !PREEMPT_RT. On PREEMPT_RT the lock does not disable -+ * preemption and it has be done explicitly. -+ */ -+ if (IS_ENABLED(CONFIG_PREEMPT_RT)) -+ preempt_disable(); - for (;;) { - unsigned n = dir->i_dir_seq; - if (!(n & 1) && cmpxchg(&dir->i_dir_seq, n, n + 1) == n) -@@ -2575,6 +2581,8 @@ static inline unsigned start_dir_add(str - static inline void end_dir_add(struct inode *dir, unsigned n) - { - smp_store_release(&dir->i_dir_seq, n + 2); -+ if (IS_ENABLED(CONFIG_PREEMPT_RT)) -+ preempt_enable(); - } - - static void d_wait_lookup(struct dentry *dentry) diff --git a/debian/patches-rt/generic-softirq-Disable-softirq-stacks-on-PREEMPT_RT.patch b/debian/patches-rt/generic-softirq-Disable-softirq-stacks-on-PREEMPT_RT.patch deleted file mode 100644 index 8d9b0effc..000000000 --- a/debian/patches-rt/generic-softirq-Disable-softirq-stacks-on-PREEMPT_RT.patch +++ /dev/null @@ -1,30 +0,0 @@ -From: Thomas Gleixner <tglx@linutronix.de> -Date: Fri, 24 Sep 2021 17:05:48 +0200 -Subject: [PATCH] generic/softirq: Disable softirq stacks on PREEMPT_RT -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/5.18/older/patches-5.18-rt11.tar.xz - -PREEMPT_RT preempts softirqs and the current implementation avoids -do_softirq_own_stack() and only uses __do_softirq(). - -Disable the unused softirqs stacks on PREEMPT_RT to safe some memory and -ensure that do_softirq_own_stack() is not used which is not expected. - -[bigeasy: commit description.] - -Signed-off-by: Thomas Gleixner <tglx@linutronix.de> -Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> ---- - include/asm-generic/softirq_stack.h | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - ---- a/include/asm-generic/softirq_stack.h -+++ b/include/asm-generic/softirq_stack.h -@@ -2,7 +2,7 @@ - #ifndef __ASM_GENERIC_SOFTIRQ_STACK_H - #define __ASM_GENERIC_SOFTIRQ_STACK_H - --#ifdef CONFIG_HAVE_SOFTIRQ_ON_OWN_STACK -+#if defined(CONFIG_HAVE_SOFTIRQ_ON_OWN_STACK) && !defined(CONFIG_PREEMPT_RT) - void do_softirq_own_stack(void); - #else - static inline void do_softirq_own_stack(void) diff --git a/debian/patches-rt/genirq-Provide-generic_handle_domain_irq_safe.patch b/debian/patches-rt/genirq-Provide-generic_handle_domain_irq_safe.patch index 5f87f0053..d00dfcbe1 100644 --- a/debian/patches-rt/genirq-Provide-generic_handle_domain_irq_safe.patch +++ b/debian/patches-rt/genirq-Provide-generic_handle_domain_irq_safe.patch @@ -1,7 +1,7 @@ From: Sebastian Andrzej Siewior <bigeasy@linutronix.de> Date: Mon, 9 May 2022 16:04:08 +0200 Subject: [PATCH] genirq: Provide generic_handle_domain_irq_safe(). -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/5.18/older/patches-5.18-rt11.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/5.19/older/patches-5.19-rc3-rt5.tar.xz Provide generic_handle_domain_irq_safe() which can used from any context. This similar to commit @@ -39,7 +39,7 @@ Link: https://lore.kernel.org/r/YnkfWFzvusFFktSt@linutronix.de --- a/drivers/bcma/driver_gpio.c +++ b/drivers/bcma/driver_gpio.c -@@ -113,7 +113,7 @@ static irqreturn_t bcma_gpio_irq_handler +@@ -115,7 +115,7 @@ static irqreturn_t bcma_gpio_irq_handler return IRQ_NONE; for_each_set_bit(gpio, &irqs, gc->ngpio) @@ -65,7 +65,7 @@ Link: https://lore.kernel.org/r/YnkfWFzvusFFktSt@linutronix.de } --- a/drivers/pinctrl/pinctrl-amd.c +++ b/drivers/pinctrl/pinctrl-amd.c -@@ -638,7 +638,7 @@ static bool do_amd_gpio_irq_handler(int +@@ -643,7 +643,7 @@ static bool do_amd_gpio_irq_handler(int if (!(regval & PIN_IRQ_PENDING) || !(regval & BIT(INTERRUPT_MASK_OFF))) continue; @@ -120,7 +120,7 @@ Link: https://lore.kernel.org/r/YnkfWFzvusFFktSt@linutronix.de --- a/kernel/irq/irqdesc.c +++ b/kernel/irq/irqdesc.c -@@ -706,6 +706,30 @@ int generic_handle_domain_irq(struct irq +@@ -705,6 +705,30 @@ int generic_handle_domain_irq(struct irq } EXPORT_SYMBOL_GPL(generic_handle_domain_irq); diff --git a/debian/patches-rt/genirq-irq_sim-Make-the-irq_work-always-run-in-hard-.patch b/debian/patches-rt/genirq-irq_sim-Make-the-irq_work-always-run-in-hard-.patch deleted file mode 100644 index cc566d13b..000000000 --- a/debian/patches-rt/genirq-irq_sim-Make-the-irq_work-always-run-in-hard-.patch +++ /dev/null @@ -1,38 +0,0 @@ -From: Sebastian Andrzej Siewior <bigeasy@linutronix.de> -Date: Wed, 11 May 2022 13:07:50 +0200 -Subject: [PATCH] genirq/irq_sim: Make the irq_work always run in hard irq - context. -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/5.18/older/patches-5.18-rt11.tar.xz - -The IRQ simulator uses irq_work to trigger an interrupt. Without the -IRQ_WORK_HARD_IRQ flag the irq_work will be performed in thread context -on PREEMPT_RT. This causes locking errors later in handle_simple_irq() -which expects to be invoked with disabled interrupts. - -Triggering individual interrupts in hardirq context should not lead to -unexpected high latencies since this is also what the hardware -controller does. Also it is used as a simulator so… - -Use IRQ_WORK_INIT_HARD() to carry out the irq_work in hardirq context on -PREEMPT_RT. - -Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> -Link: https://lore.kernel.org/r/YnuZBoEVMGwKkLm+@linutronix.de ---- - kernel/irq/irq_sim.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - ---- a/kernel/irq/irq_sim.c -+++ b/kernel/irq/irq_sim.c -@@ -181,7 +181,7 @@ struct irq_domain *irq_domain_create_sim - goto err_free_bitmap; - - work_ctx->irq_count = num_irqs; -- init_irq_work(&work_ctx->work, irq_sim_handle_irq); -+ work_ctx->work = IRQ_WORK_INIT_HARD(irq_sim_handle_irq); - - return work_ctx->domain; - diff --git a/debian/patches-rt/iio-adc-stm32-adc-Use-generic_handle_domain_irq.patch b/debian/patches-rt/iio-adc-stm32-adc-Use-generic_handle_domain_irq.patch index 18e6c0221..aa55f2f1f 100644 --- a/debian/patches-rt/iio-adc-stm32-adc-Use-generic_handle_domain_irq.patch +++ b/debian/patches-rt/iio-adc-stm32-adc-Use-generic_handle_domain_irq.patch @@ -1,7 +1,7 @@ From: Sebastian Andrzej Siewior <bigeasy@linutronix.de> Date: Wed, 11 May 2022 13:06:09 +0200 Subject: [PATCH] iio: adc: stm32-adc: Use generic_handle_domain_irq() -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/5.18/older/patches-5.18-rt11.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/5.19/older/patches-5.19-rc3-rt5.tar.xz The call chain generic_handle_irq(irq_find_mapping(domain, x)); diff --git a/debian/patches-rt/jump-label__disable_if_stop_machine_is_used.patch b/debian/patches-rt/jump-label__disable_if_stop_machine_is_used.patch deleted file mode 100644 index 803dbe221..000000000 --- a/debian/patches-rt/jump-label__disable_if_stop_machine_is_used.patch +++ /dev/null @@ -1,41 +0,0 @@ -Subject: jump-label: disable if stop_machine() is used -From: Thomas Gleixner <tglx@linutronix.de> -Date: Wed Jul 8 17:14:48 2015 +0200 -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/5.18/older/patches-5.18-rt11.tar.xz - -From: Thomas Gleixner <tglx@linutronix.de> - -Some architectures are using stop_machine() while switching the opcode which -leads to latency spikes. -The architectures which use stop_machine() atm: -- ARM stop machine -- s390 stop machine - -The architecures which use other sorcery: -- MIPS -- X86 -- powerpc -- sparc -- arm64 - -Signed-off-by: Thomas Gleixner <tglx@linutronix.de> -[bigeasy: only ARM for now] -Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> -Signed-off-by: Thomas Gleixner <tglx@linutronix.de> - - ---- - arch/arm/Kconfig | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) ---- ---- a/arch/arm/Kconfig -+++ b/arch/arm/Kconfig -@@ -71,7 +71,7 @@ config ARM - select HARDIRQS_SW_RESEND - select HAVE_ARCH_AUDITSYSCALL if AEABI && !OABI_COMPAT - select HAVE_ARCH_BITREVERSE if (CPU_32v7M || CPU_32v7) && !CPU_32v6 -- select HAVE_ARCH_JUMP_LABEL if !XIP_KERNEL && !CPU_ENDIAN_BE32 && MMU -+ select HAVE_ARCH_JUMP_LABEL if !XIP_KERNEL && !CPU_ENDIAN_BE32 && MMU && !PREEMPT_RT - select HAVE_ARCH_KFENCE if MMU && !XIP_KERNEL - select HAVE_ARCH_KGDB if !CPU_ENDIAN_BE32 && MMU - select HAVE_ARCH_KASAN if MMU && !XIP_KERNEL diff --git a/debian/patches-rt/lib-irq_poll-Prevent-softirq-pending-leak-in-irq_pol.patch b/debian/patches-rt/lib-irq_poll-Prevent-softirq-pending-leak-in-irq_pol.patch deleted file mode 100644 index c33238b46..000000000 --- a/debian/patches-rt/lib-irq_poll-Prevent-softirq-pending-leak-in-irq_pol.patch +++ /dev/null @@ -1,50 +0,0 @@ -From: Sebastian Andrzej Siewior <bigeasy@linutronix.de> -Date: Sun, 10 Apr 2022 14:49:36 +0200 -Subject: [PATCH] lib/irq_poll: Prevent softirq pending leak in - irq_poll_cpu_dead() -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/5.18/older/patches-5.18-rt11.tar.xz - -irq_poll_cpu_dead() pulls the blk_cpu_iopoll backlog from the dead CPU and -raises the POLL softirq with __raise_softirq_irqoff() on the CPU it is -running on. That just sets the bit in the pending softirq mask. - -This means the handling of the softirq is delayed until the next interrupt -or a local_bh_disable/enable() pair. As a consequence the CPU on which this -code runs can reach idle with the POLL softirq pending, which triggers a -warning in the NOHZ idle code. - -Add a local_bh_disable/enable() pair around the interrupts disabled section -in irq_poll_cpu_dead(). local_bh_enable will handle the pending softirq. - -[tglx: Massaged changelog and comment] - -Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> -Signed-off-by: Thomas Gleixner <tglx@linutronix.de> -Link: https://lore.kernel.org/r/87k0bxgl27.ffs@tglx ---- - lib/irq_poll.c | 8 ++++++-- - 1 file changed, 6 insertions(+), 2 deletions(-) - ---- a/lib/irq_poll.c -+++ b/lib/irq_poll.c -@@ -188,14 +188,18 @@ EXPORT_SYMBOL(irq_poll_init); - static int irq_poll_cpu_dead(unsigned int cpu) - { - /* -- * If a CPU goes away, splice its entries to the current CPU -- * and trigger a run of the softirq -+ * If a CPU goes away, splice its entries to the current CPU and -+ * set the POLL softirq bit. The local_bh_disable()/enable() pair -+ * ensures that it is handled. Otherwise the current CPU could -+ * reach idle with the POLL softirq pending. - */ -+ local_bh_disable(); - local_irq_disable(); - list_splice_init(&per_cpu(blk_cpu_iopoll, cpu), - this_cpu_ptr(&blk_cpu_iopoll)); - __raise_softirq_irqoff(IRQ_POLL_SOFTIRQ); - local_irq_enable(); -+ local_bh_enable(); - - return 0; - } diff --git a/debian/patches-rt/locking-lockdep-Remove-lockdep_init_map_crosslock.patch b/debian/patches-rt/locking-lockdep-Remove-lockdep_init_map_crosslock.patch index 838492fec..8a8e17623 100644 --- a/debian/patches-rt/locking-lockdep-Remove-lockdep_init_map_crosslock.patch +++ b/debian/patches-rt/locking-lockdep-Remove-lockdep_init_map_crosslock.patch @@ -1,7 +1,7 @@ From: Sebastian Andrzej Siewior <bigeasy@linutronix.de> Date: Fri, 11 Mar 2022 17:44:57 +0100 Subject: [PATCH] locking/lockdep: Remove lockdep_init_map_crosslock. -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/5.18/older/patches-5.18-rt11.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/5.19/older/patches-5.19-rc3-rt5.tar.xz The cross-release bits have been removed, lockdep_init_map_crosslock() is a leftover. @@ -11,6 +11,7 @@ Remove lockdep_init_map_crosslock. Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> Reviewed-by: Waiman Long <longman@redhat.com> Link: https://lore.kernel.org/r/20220311164457.46461-1-bigeasy@linutronix.de +Link: https://lore.kernel.org/r/YqITgY+2aPITu96z@linutronix.de --- include/linux/lockdep.h | 1 - 1 file changed, 1 deletion(-) diff --git a/debian/patches-rt/mm-vmalloc-Use-raw_cpu_ptr-for-vmap_block_queue-acce.patch b/debian/patches-rt/mm-vmalloc-Use-raw_cpu_ptr-for-vmap_block_queue-acce.patch deleted file mode 100644 index 00c0923b1..000000000 --- a/debian/patches-rt/mm-vmalloc-Use-raw_cpu_ptr-for-vmap_block_queue-acce.patch +++ /dev/null @@ -1,59 +0,0 @@ -From: Sebastian Andrzej Siewior <bigeasy@linutronix.de> -Date: Wed, 4 May 2022 19:03:25 +0200 -Subject: [PATCH] mm/vmalloc: Use raw_cpu_ptr() for vmap_block_queue access. -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/5.18/older/patches-5.18-rt11.tar.xz - -The per-CPU resource vmap_block_queue is accessed via the -get_cpu_var(). That macro disables preemption and then loads the pointer -from the current CPU. -This doesn't work on PREEMPT_RT because a spinlock_t is later accessed -within the preempt-disable section. - -There is no need to disable preemption while accessing the per-CPU -struct vmap_block_queue because the list is protected with a spinlock_t. -The per-CPU struct is also accessed cross-CPU in -purge_fragmented_blocks(). -It is possible that by using raw_cpu_ptr() the code migrates to another -CPU and uses struct from another CPU. This is fine because the list -locked and the locked section is very short. - -Use raw_cpu_ptr() to access vmap_block_queue. - -Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> -Link: https://lore.kernel.org/r/YnKx3duAB53P7ojN@linutronix.de ---- - mm/vmalloc.c | 6 ++---- - 1 file changed, 2 insertions(+), 4 deletions(-) - ---- a/mm/vmalloc.c -+++ b/mm/vmalloc.c -@@ -1938,11 +1938,10 @@ static void *new_vmap_block(unsigned int - return ERR_PTR(err); - } - -- vbq = &get_cpu_var(vmap_block_queue); -+ vbq = raw_cpu_ptr(&vmap_block_queue); - spin_lock(&vbq->lock); - list_add_tail_rcu(&vb->free_list, &vbq->free); - spin_unlock(&vbq->lock); -- put_cpu_var(vmap_block_queue); - - return vaddr; - } -@@ -2021,7 +2020,7 @@ static void *vb_alloc(unsigned long size - order = get_order(size); - - rcu_read_lock(); -- vbq = &get_cpu_var(vmap_block_queue); -+ vbq = raw_cpu_ptr(&vmap_block_queue); - list_for_each_entry_rcu(vb, &vbq->free, free_list) { - unsigned long pages_off; - -@@ -2044,7 +2043,6 @@ static void *vb_alloc(unsigned long size - break; - } - -- put_cpu_var(vmap_block_queue); - rcu_read_unlock(); - - /* Allocate new block if nothing was found */ diff --git a/debian/patches-rt/powerpc__Add_support_for_lazy_preemption.patch b/debian/patches-rt/powerpc__Add_support_for_lazy_preemption.patch index 8be1b7e31..c48a8a745 100644 --- a/debian/patches-rt/powerpc__Add_support_for_lazy_preemption.patch +++ b/debian/patches-rt/powerpc__Add_support_for_lazy_preemption.patch @@ -1,7 +1,7 @@ Subject: powerpc: Add support for lazy preemption From: Thomas Gleixner <tglx@linutronix.de> Date: Thu Nov 1 10:14:11 2012 +0100 -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/5.18/older/patches-5.18-rt11.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/5.19/older/patches-5.19-rc3-rt5.tar.xz From: Thomas Gleixner <tglx@linutronix.de> @@ -18,7 +18,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> --- --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig -@@ -236,6 +236,7 @@ config PPC +@@ -240,6 +240,7 @@ config PPC select HAVE_PERF_EVENTS_NMI if PPC64 select HAVE_PERF_REGS select HAVE_PERF_USER_STACK_DUMP @@ -28,7 +28,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> select HAVE_RSEQ --- a/arch/powerpc/include/asm/thread_info.h +++ b/arch/powerpc/include/asm/thread_info.h -@@ -47,6 +47,8 @@ +@@ -53,6 +53,8 @@ struct thread_info { int preempt_count; /* 0 => preemptable, <0 => BUG */ @@ -37,7 +37,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> #ifdef CONFIG_SMP unsigned int cpu; #endif -@@ -71,6 +73,7 @@ struct thread_info { +@@ -77,6 +79,7 @@ struct thread_info { #define INIT_THREAD_INFO(tsk) \ { \ .preempt_count = INIT_PREEMPT_COUNT, \ @@ -45,7 +45,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> .flags = 0, \ } -@@ -96,6 +99,7 @@ void arch_setup_new_exec(void); +@@ -102,6 +105,7 @@ void arch_setup_new_exec(void); #define TIF_PATCH_PENDING 6 /* pending live patching update */ #define TIF_SYSCALL_AUDIT 7 /* syscall auditing active */ #define TIF_SINGLESTEP 8 /* singlestepping active */ @@ -53,7 +53,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> #define TIF_SECCOMP 10 /* secure computing */ #define TIF_RESTOREALL 11 /* Restore all regs (implies NOERROR) */ #define TIF_NOERROR 12 /* Force successful syscall return */ -@@ -111,6 +115,7 @@ void arch_setup_new_exec(void); +@@ -117,6 +121,7 @@ void arch_setup_new_exec(void); #define TIF_POLLING_NRFLAG 19 /* true if poll_idle() is polling TIF_NEED_RESCHED */ #define TIF_32BIT 20 /* 32 bit binary */ @@ -61,7 +61,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> /* as above, but as bit values */ #define _TIF_SYSCALL_TRACE (1<<TIF_SYSCALL_TRACE) #define _TIF_SIGPENDING (1<<TIF_SIGPENDING) -@@ -122,6 +127,7 @@ void arch_setup_new_exec(void); +@@ -128,6 +133,7 @@ void arch_setup_new_exec(void); #define _TIF_PATCH_PENDING (1<<TIF_PATCH_PENDING) #define _TIF_SYSCALL_AUDIT (1<<TIF_SYSCALL_AUDIT) #define _TIF_SINGLESTEP (1<<TIF_SINGLESTEP) @@ -69,7 +69,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> #define _TIF_SECCOMP (1<<TIF_SECCOMP) #define _TIF_RESTOREALL (1<<TIF_RESTOREALL) #define _TIF_NOERROR (1<<TIF_NOERROR) -@@ -135,10 +141,12 @@ void arch_setup_new_exec(void); +@@ -141,10 +147,12 @@ void arch_setup_new_exec(void); _TIF_SYSCALL_EMU) #define _TIF_USER_WORK_MASK (_TIF_SIGPENDING | _TIF_NEED_RESCHED | \ diff --git a/debian/patches-rt/powerpc__traps__Use_PREEMPT_RT.patch b/debian/patches-rt/powerpc__traps__Use_PREEMPT_RT.patch index 3e28b883a..3a78b1957 100644 --- a/debian/patches-rt/powerpc__traps__Use_PREEMPT_RT.patch +++ b/debian/patches-rt/powerpc__traps__Use_PREEMPT_RT.patch @@ -1,7 +1,7 @@ Subject: powerpc: traps: Use PREEMPT_RT From: Sebastian Andrzej Siewior <bigeasy@linutronix.de> Date: Fri Jul 26 11:30:49 2019 +0200 -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/5.18/older/patches-5.18-rt11.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/5.19/older/patches-5.19-rc3-rt5.tar.xz From: Sebastian Andrzej Siewior <bigeasy@linutronix.de> diff --git a/debian/patches-rt/powerpc_kvm__Disable_in-kernel_MPIC_emulation_for_PREEMPT_RT.patch b/debian/patches-rt/powerpc_kvm__Disable_in-kernel_MPIC_emulation_for_PREEMPT_RT.patch index d42919284..551a6ac44 100644 --- a/debian/patches-rt/powerpc_kvm__Disable_in-kernel_MPIC_emulation_for_PREEMPT_RT.patch +++ b/debian/patches-rt/powerpc_kvm__Disable_in-kernel_MPIC_emulation_for_PREEMPT_RT.patch @@ -1,7 +1,7 @@ Subject: powerpc/kvm: Disable in-kernel MPIC emulation for PREEMPT_RT From: Bogdan Purcareata <bogdan.purcareata@freescale.com> Date: Fri Apr 24 15:53:13 2015 +0000 -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/5.18/older/patches-5.18-rt11.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/5.19/older/patches-5.19-rc3-rt5.tar.xz From: Bogdan Purcareata <bogdan.purcareata@freescale.com> diff --git a/debian/patches-rt/powerpc_pseries_iommu__Use_a_locallock_instead_local_irq_save.patch b/debian/patches-rt/powerpc_pseries_iommu__Use_a_locallock_instead_local_irq_save.patch index 0d15a3c5a..a2a723d6f 100644 --- a/debian/patches-rt/powerpc_pseries_iommu__Use_a_locallock_instead_local_irq_save.patch +++ b/debian/patches-rt/powerpc_pseries_iommu__Use_a_locallock_instead_local_irq_save.patch @@ -1,7 +1,7 @@ Subject: powerpc/pseries/iommu: Use a locallock instead local_irq_save() From: Sebastian Andrzej Siewior <bigeasy@linutronix.de> Date: Tue Mar 26 18:31:54 2019 +0100 -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/5.18/older/patches-5.18-rt11.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/5.19/older/patches-5.19-rc3-rt5.tar.xz From: Sebastian Andrzej Siewior <bigeasy@linutronix.de> diff --git a/debian/patches-rt/powerpc_stackprotector__work_around_stack-guard_init_from_atomic.patch b/debian/patches-rt/powerpc_stackprotector__work_around_stack-guard_init_from_atomic.patch index 2dc4857be..caa51d226 100644 --- a/debian/patches-rt/powerpc_stackprotector__work_around_stack-guard_init_from_atomic.patch +++ b/debian/patches-rt/powerpc_stackprotector__work_around_stack-guard_init_from_atomic.patch @@ -1,7 +1,7 @@ Subject: powerpc/stackprotector: work around stack-guard init from atomic From: Sebastian Andrzej Siewior <bigeasy@linutronix.de> Date: Tue Mar 26 18:31:29 2019 +0100 -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/5.18/older/patches-5.18-rt11.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/5.19/older/patches-5.19-rc3-rt5.tar.xz From: Sebastian Andrzej Siewior <bigeasy@linutronix.de> diff --git a/debian/patches-rt/printk-reinstate-deferred-printing-with-kthreads.patch b/debian/patches-rt/printk-reinstate-deferred-printing-with-kthreads.patch new file mode 100644 index 000000000..8d6874c19 --- /dev/null +++ b/debian/patches-rt/printk-reinstate-deferred-printing-with-kthreads.patch @@ -0,0 +1,1239 @@ +From: Ben Hutchings <benh@debian.org> +Date: Wed, 29 Jun 2022 14:44:15 +0200 +Subject: printk: Reinstate deferred printing with kthreads +Forwarded: not-needed + +Several changes to printk were reverted in 5.19-rc4, which the +PREEMPT_RT patch set depended on. Reinstate those changes until there +is a new release of the patch set that resolves this properly. + +--- +diff --git a/drivers/tty/sysrq.c b/drivers/tty/sysrq.c +index bbfd004449b5..2884cd638d64 100644 +--- a/drivers/tty/sysrq.c ++++ b/drivers/tty/sysrq.c +@@ -578,6 +578,7 @@ void __handle_sysrq(int key, bool check_mask) + + rcu_sysrq_start(); + rcu_read_lock(); ++ printk_prefer_direct_enter(); + /* + * Raise the apparent loglevel to maximum so that the sysrq header + * is shown to provide the user with positive feedback. We do not +@@ -619,6 +620,7 @@ void __handle_sysrq(int key, bool check_mask) + pr_cont("\n"); + console_loglevel = orig_log_level; + } ++ printk_prefer_direct_exit(); + rcu_read_unlock(); + rcu_sysrq_end(); + +diff --git a/include/linux/console.h b/include/linux/console.h +index 8c1686e2c233..143653090c48 100644 +--- a/include/linux/console.h ++++ b/include/linux/console.h +@@ -16,6 +16,7 @@ + + #include <linux/atomic.h> + #include <linux/types.h> ++#include <linux/mutex.h> + + struct vc_data; + struct console_font_op; +@@ -153,6 +154,22 @@ struct console { + uint ospeed; + u64 seq; + unsigned long dropped; ++ struct task_struct *thread; ++ bool blocked; ++ ++ /* ++ * The per-console lock is used by printing kthreads to synchronize ++ * this console with callers of console_lock(). This is necessary in ++ * order to allow printing kthreads to run in parallel to each other, ++ * while each safely accessing the @blocked field and synchronizing ++ * against direct printing via console_lock/console_unlock. ++ * ++ * Note: For synchronizing against direct printing via ++ * console_trylock/console_unlock, see the static global ++ * variable @console_kthreads_active. ++ */ ++ struct mutex lock; ++ + void *data; + struct console *next; + }; +diff --git a/include/linux/printk.h b/include/linux/printk.h +index 091fba7283e1..c1e07c0652c7 100644 +--- a/include/linux/printk.h ++++ b/include/linux/printk.h +@@ -170,7 +170,11 @@ extern void __printk_safe_exit(void); + #define printk_deferred_enter __printk_safe_enter + #define printk_deferred_exit __printk_safe_exit + ++extern void printk_prefer_direct_enter(void); ++extern void printk_prefer_direct_exit(void); ++ + extern bool pr_flush(int timeout_ms, bool reset_on_progress); ++extern void try_block_console_kthreads(int timeout_ms); + + /* + * Please don't use printk_ratelimit(), because it shares ratelimiting state +@@ -222,11 +226,23 @@ static inline void printk_deferred_exit(void) + { + } + ++static inline void printk_prefer_direct_enter(void) ++{ ++} ++ ++static inline void printk_prefer_direct_exit(void) ++{ ++} ++ + static inline bool pr_flush(int timeout_ms, bool reset_on_progress) + { + return true; + } + ++static inline void try_block_console_kthreads(int timeout_ms) ++{ ++} ++ + static inline int printk_ratelimit(void) + { + return 0; +diff --git a/kernel/hung_task.c b/kernel/hung_task.c +index 52501e5f7655..02a65d554340 100644 +--- a/kernel/hung_task.c ++++ b/kernel/hung_task.c +@@ -127,6 +127,8 @@ static void check_hung_task(struct task_struct *t, unsigned long timeout) + * complain: + */ + if (sysctl_hung_task_warnings) { ++ printk_prefer_direct_enter(); ++ + if (sysctl_hung_task_warnings > 0) + sysctl_hung_task_warnings--; + pr_err("INFO: task %s:%d blocked for more than %ld seconds.\n", +@@ -142,6 +144,8 @@ static void check_hung_task(struct task_struct *t, unsigned long timeout) + + if (sysctl_hung_task_all_cpu_backtrace) + hung_task_show_all_bt = true; ++ ++ printk_prefer_direct_exit(); + } + + touch_nmi_watchdog(); +@@ -204,12 +208,17 @@ static void check_hung_uninterruptible_tasks(unsigned long timeout) + } + unlock: + rcu_read_unlock(); +- if (hung_task_show_lock) ++ if (hung_task_show_lock) { ++ printk_prefer_direct_enter(); + debug_show_all_locks(); ++ printk_prefer_direct_exit(); ++ } + + if (hung_task_show_all_bt) { + hung_task_show_all_bt = false; ++ printk_prefer_direct_enter(); + trigger_all_cpu_backtrace(); ++ printk_prefer_direct_exit(); + } + + if (hung_task_call_panic) +diff --git a/kernel/panic.c b/kernel/panic.c +index 8355b19676f8..fe73d18ecdf0 100644 +--- a/kernel/panic.c ++++ b/kernel/panic.c +@@ -273,6 +273,7 @@ void panic(const char *fmt, ...) + * unfortunately means it may not be hardened to work in a + * panic situation. + */ ++ try_block_console_kthreads(10000); + smp_send_stop(); + } else { + /* +@@ -280,6 +281,7 @@ void panic(const char *fmt, ...) + * kmsg_dump, we will need architecture dependent extra + * works in addition to stopping other CPUs. + */ ++ try_block_console_kthreads(10000); + crash_smp_send_stop(); + } + +@@ -579,6 +581,8 @@ void __warn(const char *file, int line, void *caller, unsigned taint, + { + disable_trace_on_warning(); + ++ printk_prefer_direct_enter(); ++ + if (file) + pr_warn("WARNING: CPU: %d PID: %d at %s:%d %pS\n", + raw_smp_processor_id(), current->pid, file, line, +@@ -608,6 +612,8 @@ void __warn(const char *file, int line, void *caller, unsigned taint, + + /* Just a warning, don't kill lockdep. */ + add_taint(taint, LOCKDEP_STILL_OK); ++ ++ printk_prefer_direct_exit(); + } + + #ifndef __WARN_FLAGS +diff --git a/kernel/printk/internal.h b/kernel/printk/internal.h +index d947ca6c84f9..e7d8578860ad 100644 +--- a/kernel/printk/internal.h ++++ b/kernel/printk/internal.h +@@ -20,6 +20,8 @@ enum printk_info_flags { + LOG_CONT = 8, /* text is a fragment of a continuation line */ + }; + ++extern bool block_console_kthreads; ++ + __printf(4, 0) + int vprintk_store(int facility, int level, + const struct dev_printk_info *dev_info, +diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c +index b49c6ff6dca0..b095fb5f5f61 100644 +--- a/kernel/printk/printk.c ++++ b/kernel/printk/printk.c +@@ -223,6 +223,36 @@ int devkmsg_sysctl_set_loglvl(struct ctl_table *table, int write, + /* Number of registered extended console drivers. */ + static int nr_ext_console_drivers; + ++/* ++ * Used to synchronize printing kthreads against direct printing via ++ * console_trylock/console_unlock. ++ * ++ * Values: ++ * -1 = console kthreads atomically blocked (via global trylock) ++ * 0 = no kthread printing, console not locked (via trylock) ++ * >0 = kthread(s) actively printing ++ * ++ * Note: For synchronizing against direct printing via ++ * console_lock/console_unlock, see the @lock variable in ++ * struct console. ++ */ ++static atomic_t console_kthreads_active = ATOMIC_INIT(0); ++ ++#define console_kthreads_atomic_tryblock() \ ++ (atomic_cmpxchg(&console_kthreads_active, 0, -1) == 0) ++#define console_kthreads_atomic_unblock() \ ++ atomic_cmpxchg(&console_kthreads_active, -1, 0) ++#define console_kthreads_atomically_blocked() \ ++ (atomic_read(&console_kthreads_active) == -1) ++ ++#define console_kthread_printing_tryenter() \ ++ atomic_inc_unless_negative(&console_kthreads_active) ++#define console_kthread_printing_exit() \ ++ atomic_dec(&console_kthreads_active) ++ ++/* Block console kthreads to avoid processing new messages. */ ++bool block_console_kthreads; ++ + /* + * Helper macros to handle lockdep when locking/unlocking console_sem. We use + * macros instead of functions so that _RET_IP_ contains useful information. +@@ -271,14 +301,49 @@ static bool panic_in_progress(void) + } + + /* +- * This is used for debugging the mess that is the VT code by +- * keeping track if we have the console semaphore held. It's +- * definitely not the perfect debug tool (we don't know if _WE_ +- * hold it and are racing, but it helps tracking those weird code +- * paths in the console code where we end up in places I want +- * locked without the console semaphore held). ++ * Tracks whether kthread printers are all blocked. A value of true implies ++ * that the console is locked via console_lock() or the console is suspended. ++ * Writing to this variable requires holding @console_sem. + */ +-static int console_locked, console_suspended; ++static bool console_kthreads_blocked; ++ ++/* ++ * Block all kthread printers from a schedulable context. ++ * ++ * Requires holding @console_sem. ++ */ ++static void console_kthreads_block(void) ++{ ++ struct console *con; ++ ++ for_each_console(con) { ++ mutex_lock(&con->lock); ++ con->blocked = true; ++ mutex_unlock(&con->lock); ++ } ++ ++ console_kthreads_blocked = true; ++} ++ ++/* ++ * Unblock all kthread printers from a schedulable context. ++ * ++ * Requires holding @console_sem. ++ */ ++static void console_kthreads_unblock(void) ++{ ++ struct console *con; ++ ++ for_each_console(con) { ++ mutex_lock(&con->lock); ++ con->blocked = false; ++ mutex_unlock(&con->lock); ++ } ++ ++ console_kthreads_blocked = false; ++} ++ ++static int console_suspended; + + /* + * Array of consoles built from command line options (console=) +@@ -361,7 +426,75 @@ static int console_msg_format = MSG_FORMAT_DEFAULT; + /* syslog_lock protects syslog_* variables and write access to clear_seq. */ + static DEFINE_MUTEX(syslog_lock); + ++/* ++ * A flag to signify if printk_activate_kthreads() has already started the ++ * kthread printers. If true, any later registered consoles must start their ++ * own kthread directly. The flag is write protected by the console_lock. ++ */ ++static bool printk_kthreads_available; ++ + #ifdef CONFIG_PRINTK ++static atomic_t printk_prefer_direct = ATOMIC_INIT(0); ++ ++/** ++ * printk_prefer_direct_enter - cause printk() calls to attempt direct ++ * printing to all enabled consoles ++ * ++ * Since it is not possible to call into the console printing code from any ++ * context, there is no guarantee that direct printing will occur. ++ * ++ * This globally effects all printk() callers. ++ * ++ * Context: Any context. ++ */ ++void printk_prefer_direct_enter(void) ++{ ++ atomic_inc(&printk_prefer_direct); ++} ++ ++/** ++ * printk_prefer_direct_exit - restore printk() behavior ++ * ++ * Context: Any context. ++ */ ++void printk_prefer_direct_exit(void) ++{ ++ WARN_ON(atomic_dec_if_positive(&printk_prefer_direct) < 0); ++} ++ ++/* ++ * Calling printk() always wakes kthread printers so that they can ++ * flush the new message to their respective consoles. Also, if direct ++ * printing is allowed, printk() tries to flush the messages directly. ++ * ++ * Direct printing is allowed in situations when the kthreads ++ * are not available or the system is in a problematic state. ++ * ++ * See the implementation about possible races. ++ */ ++static inline bool allow_direct_printing(void) ++{ ++ /* ++ * Checking kthread availability is a possible race because the ++ * kthread printers can become permanently disabled during runtime. ++ * However, doing that requires holding the console_lock, so any ++ * pending messages will be direct printed by console_unlock(). ++ */ ++ if (!printk_kthreads_available) ++ return true; ++ ++ /* ++ * Prefer direct printing when the system is in a problematic state. ++ * The context that sets this state will always see the updated value. ++ * The other contexts do not care. Anyway, direct printing is just a ++ * best effort. The direct output is only possible when console_lock ++ * is not already taken and no kthread printers are actively printing. ++ */ ++ return (system_state > SYSTEM_RUNNING || ++ oops_in_progress || ++ atomic_read(&printk_prefer_direct)); ++} ++ + DECLARE_WAIT_QUEUE_HEAD(log_wait); + /* All 3 protected by @syslog_lock. */ + /* the next printk record to read by syslog(READ) or /proc/kmsg */ +@@ -2252,10 +2385,10 @@ asmlinkage int vprintk_emit(int facility, int level, + printed_len = vprintk_store(facility, level, dev_info, fmt, args); + + /* If called from the scheduler, we can not call up(). */ +- if (!in_sched) { ++ if (!in_sched && allow_direct_printing()) { + /* + * The caller may be holding system-critical or +- * timing-sensitive locks. Disable preemption during ++ * timing-sensitive locks. Disable preemption during direct + * printing of all remaining records to all consoles so that + * this context can return as soon as possible. Hopefully + * another printk() caller will take over the printing. +@@ -2298,6 +2431,8 @@ EXPORT_SYMBOL(_printk); + + static bool __pr_flush(struct console *con, int timeout_ms, bool reset_on_progress); + ++static void printk_start_kthread(struct console *con); ++ + #else /* CONFIG_PRINTK */ + + #define CONSOLE_LOG_MAX 0 +@@ -2331,6 +2466,8 @@ static void call_console_driver(struct console *con, const char *text, size_t le + } + static bool suppress_message_printing(int level) { return false; } + static bool __pr_flush(struct console *con, int timeout_ms, bool reset_on_progress) { return true; } ++static void printk_start_kthread(struct console *con) { } ++static bool allow_direct_printing(void) { return true; } + + #endif /* CONFIG_PRINTK */ + +@@ -2549,6 +2686,14 @@ static int console_cpu_notify(unsigned int cpu) + /* If trylock fails, someone else is doing the printing */ + if (console_trylock()) + console_unlock(); ++ else { ++ /* ++ * If a new CPU comes online, the conditions for ++ * printer_should_wake() may have changed for some ++ * kthread printer with !CON_ANYTIME. ++ */ ++ wake_up_klogd(); ++ } + } + return 0; + } +@@ -2568,7 +2713,7 @@ void console_lock(void) + down_console_sem(); + if (console_suspended) + return; +- console_locked = 1; ++ console_kthreads_block(); + console_may_schedule = 1; + } + EXPORT_SYMBOL(console_lock); +@@ -2589,15 +2734,30 @@ int console_trylock(void) + up_console_sem(); + return 0; + } +- console_locked = 1; ++ if (!console_kthreads_atomic_tryblock()) { ++ up_console_sem(); ++ return 0; ++ } + console_may_schedule = 0; + return 1; + } + EXPORT_SYMBOL(console_trylock); + ++/* ++ * This is used to help to make sure that certain paths within the VT code are ++ * running with the console lock held. It is definitely not the perfect debug ++ * tool (it is not known if the VT code is the task holding the console lock), ++ * but it helps tracking those weird code paths in the console code such as ++ * when the console is suspended: where the console is not locked but no ++ * console printing may occur. ++ * ++ * Note: This returns true when the console is suspended but is not locked. ++ * This is intentional because the VT code must consider that situation ++ * the same as if the console was locked. ++ */ + int is_console_locked(void) + { +- return console_locked; ++ return (console_kthreads_blocked || atomic_read(&console_kthreads_active)); + } + EXPORT_SYMBOL(is_console_locked); + +@@ -2620,18 +2780,9 @@ static bool abandon_console_lock_in_panic(void) + return atomic_read(&panic_cpu) != raw_smp_processor_id(); + } + +-/* +- * Check if the given console is currently capable and allowed to print +- * records. +- * +- * Requires the console_lock. +- */ +-static inline bool console_is_usable(struct console *con) ++static inline bool __console_is_usable(short flags) + { +- if (!(con->flags & CON_ENABLED)) +- return false; +- +- if (!con->write) ++ if (!(flags & CON_ENABLED)) + return false; + + /* +@@ -2640,15 +2791,43 @@ static inline bool console_is_usable(struct console *con) + * cope (CON_ANYTIME) don't call them until this CPU is officially up. + */ + if (!cpu_online(raw_smp_processor_id()) && +- !(con->flags & CON_ANYTIME)) ++ !(flags & CON_ANYTIME)) + return false; + + return true; + } + ++/* ++ * Check if the given console is currently capable and allowed to print ++ * records. ++ * ++ * Requires holding the console_lock. ++ */ ++static inline bool console_is_usable(struct console *con) ++{ ++ if (!con->write) ++ return false; ++ ++ return __console_is_usable(con->flags); ++} ++ + static void __console_unlock(void) + { +- console_locked = 0; ++ /* ++ * Depending on whether console_lock() or console_trylock() was used, ++ * appropriately allow the kthread printers to continue. ++ */ ++ if (console_kthreads_blocked) ++ console_kthreads_unblock(); ++ else ++ console_kthreads_atomic_unblock(); ++ ++ /* ++ * New records may have arrived while the console was locked. ++ * Wake the kthread printers to print them. ++ */ ++ wake_up_klogd(); ++ + up_console_sem(); + } + +@@ -2666,17 +2845,19 @@ static void __console_unlock(void) + * + * @handover will be set to true if a printk waiter has taken over the + * console_lock, in which case the caller is no longer holding the +- * console_lock. Otherwise it is set to false. ++ * console_lock. Otherwise it is set to false. A NULL pointer may be provided ++ * to disable allowing the console_lock to be taken over by a printk waiter. + * + * Returns false if the given console has no next record to print, otherwise + * true. + * +- * Requires the console_lock. ++ * Requires the console_lock if @handover is non-NULL. ++ * Requires con->lock otherwise. + */ +-static bool console_emit_next_record(struct console *con, char *text, char *ext_text, +- char *dropped_text, bool *handover) ++static bool __console_emit_next_record(struct console *con, char *text, char *ext_text, ++ char *dropped_text, bool *handover) + { +- static int panic_console_dropped; ++ static atomic_t panic_console_dropped = ATOMIC_INIT(0); + struct printk_info info; + struct printk_record r; + unsigned long flags; +@@ -2685,7 +2866,8 @@ static bool console_emit_next_record(struct console *con, char *text, char *ext_ + + prb_rec_init_rd(&r, &info, text, CONSOLE_LOG_MAX); + +- *handover = false; ++ if (handover) ++ *handover = false; + + if (!prb_read_valid(prb, con->seq, &r)) + return false; +@@ -2693,7 +2875,8 @@ static bool console_emit_next_record(struct console *con, char *text, char *ext_ + if (con->seq != r.info->seq) { + con->dropped += r.info->seq - con->seq; + con->seq = r.info->seq; +- if (panic_in_progress() && panic_console_dropped++ > 10) { ++ if (panic_in_progress() && ++ atomic_fetch_inc_relaxed(&panic_console_dropped) > 10) { + suppress_panic_printk = 1; + pr_warn_once("Too many dropped messages. Suppress messages on non-panic CPUs to prevent livelock.\n"); + } +@@ -2715,31 +2898,61 @@ static bool console_emit_next_record(struct console *con, char *text, char *ext_ + len = record_print_text(&r, console_msg_format & MSG_FORMAT_SYSLOG, printk_time); + } + +- /* +- * While actively printing out messages, if another printk() +- * were to occur on another CPU, it may wait for this one to +- * finish. This task can not be preempted if there is a +- * waiter waiting to take over. +- * +- * Interrupts are disabled because the hand over to a waiter +- * must not be interrupted until the hand over is completed +- * (@console_waiter is cleared). +- */ +- printk_safe_enter_irqsave(flags); +- console_lock_spinning_enable(); ++ if (handover) { ++ /* ++ * While actively printing out messages, if another printk() ++ * were to occur on another CPU, it may wait for this one to ++ * finish. This task can not be preempted if there is a ++ * waiter waiting to take over. ++ * ++ * Interrupts are disabled because the hand over to a waiter ++ * must not be interrupted until the hand over is completed ++ * (@console_waiter is cleared). ++ */ ++ printk_safe_enter_irqsave(flags); ++ console_lock_spinning_enable(); ++ ++ /* don't trace irqsoff print latency */ ++ stop_critical_timings(); ++ } + +- stop_critical_timings(); /* don't trace print latency */ + call_console_driver(con, write_text, len, dropped_text); +- start_critical_timings(); + + con->seq++; + +- *handover = console_lock_spinning_disable_and_check(); +- printk_safe_exit_irqrestore(flags); ++ if (handover) { ++ start_critical_timings(); ++ *handover = console_lock_spinning_disable_and_check(); ++ printk_safe_exit_irqrestore(flags); ++ } + skip: + return true; + } + ++/* ++ * Print a record for a given console, but allow another printk() caller to ++ * take over the console_lock and continue printing. ++ * ++ * Requires the console_lock, but depending on @handover after the call, the ++ * caller may no longer have the console_lock. ++ * ++ * See __console_emit_next_record() for argument and return details. ++ */ ++static bool console_emit_next_record_transferable(struct console *con, char *text, char *ext_text, ++ char *dropped_text, bool *handover) ++{ ++ /* ++ * Handovers are only supported if threaded printers are atomically ++ * blocked. The context taking over the console_lock may be atomic. ++ */ ++ if (!console_kthreads_atomically_blocked()) { ++ *handover = false; ++ handover = NULL; ++ } ++ ++ return __console_emit_next_record(con, text, ext_text, dropped_text, handover); ++} ++ + /* + * Print out all remaining records to all consoles. + * +@@ -2758,8 +2971,8 @@ static bool console_emit_next_record(struct console *con, char *text, char *ext_ + * were flushed to all usable consoles. A returned false informs the caller + * that everything was not flushed (either there were no usable consoles or + * another context has taken over printing or it is a panic situation and this +- * is not the panic CPU). Regardless the reason, the caller should assume it +- * is not useful to immediately try again. ++ * is not the panic CPU or direct printing is not preferred). Regardless the ++ * reason, the caller should assume it is not useful to immediately try again. + * + * Requires the console_lock. + */ +@@ -2776,6 +2989,10 @@ static bool console_flush_all(bool do_cond_resched, u64 *next_seq, bool *handove + *handover = false; + + do { ++ /* Let the kthread printers do the work if they can. */ ++ if (!allow_direct_printing()) ++ return false; ++ + any_progress = false; + + for_each_console(con) { +@@ -2787,13 +3004,11 @@ static bool console_flush_all(bool do_cond_resched, u64 *next_seq, bool *handove + + if (con->flags & CON_EXTENDED) { + /* Extended consoles do not print "dropped messages". */ +- progress = console_emit_next_record(con, &text[0], +- &ext_text[0], NULL, +- handover); ++ progress = console_emit_next_record_transferable(con, &text[0], ++ &ext_text[0], NULL, handover); + } else { +- progress = console_emit_next_record(con, &text[0], +- NULL, &dropped_text[0], +- handover); ++ progress = console_emit_next_record_transferable(con, &text[0], ++ NULL, &dropped_text[0], handover); + } + if (*handover) + return false; +@@ -2908,10 +3123,13 @@ void console_unblank(void) + if (oops_in_progress) { + if (down_trylock_console_sem() != 0) + return; ++ if (!console_kthreads_atomic_tryblock()) { ++ up_console_sem(); ++ return; ++ } + } else + console_lock(); + +- console_locked = 1; + console_may_schedule = 0; + for_each_console(c) + if ((c->flags & CON_ENABLED) && c->unblank) +@@ -3190,6 +3408,10 @@ void register_console(struct console *newcon) + nr_ext_console_drivers++; + + newcon->dropped = 0; ++ newcon->thread = NULL; ++ newcon->blocked = true; ++ mutex_init(&newcon->lock); ++ + if (newcon->flags & CON_PRINTBUFFER) { + /* Get a consistent copy of @syslog_seq. */ + mutex_lock(&syslog_lock); +@@ -3199,6 +3421,10 @@ void register_console(struct console *newcon) + /* Begin with next message. */ + newcon->seq = prb_next_seq(prb); + } ++ ++ if (printk_kthreads_available) ++ printk_start_kthread(newcon); ++ + console_unlock(); + console_sysfs_notify(); + +@@ -3225,6 +3451,7 @@ EXPORT_SYMBOL(register_console); + + int unregister_console(struct console *console) + { ++ struct task_struct *thd; + struct console *con; + int res; + +@@ -3265,7 +3492,20 @@ int unregister_console(struct console *console) + console_drivers->flags |= CON_CONSDEV; + + console->flags &= ~CON_ENABLED; ++ ++ /* ++ * console->thread can only be cleared under the console lock. But ++ * stopping the thread must be done without the console lock. The ++ * task that clears @thread is the task that stops the kthread. ++ */ ++ thd = console->thread; ++ console->thread = NULL; ++ + console_unlock(); ++ ++ if (thd) ++ kthread_stop(thd); ++ + console_sysfs_notify(); + + if (console->exit) +@@ -3361,6 +3601,20 @@ static int __init printk_late_init(void) + } + late_initcall(printk_late_init); + ++static int __init printk_activate_kthreads(void) ++{ ++ struct console *con; ++ ++ console_lock(); ++ printk_kthreads_available = true; ++ for_each_console(con) ++ printk_start_kthread(con); ++ console_unlock(); ++ ++ return 0; ++} ++early_initcall(printk_activate_kthreads); ++ + #if defined CONFIG_PRINTK + /* If @con is specified, only wait for that console. Otherwise wait for all. */ + static bool __pr_flush(struct console *con, int timeout_ms, bool reset_on_progress) +@@ -3435,11 +3689,209 @@ bool pr_flush(int timeout_ms, bool reset_on_progress) + } + EXPORT_SYMBOL(pr_flush); + ++static void __printk_fallback_preferred_direct(void) ++{ ++ printk_prefer_direct_enter(); ++ pr_err("falling back to preferred direct printing\n"); ++ printk_kthreads_available = false; ++} ++ ++/* ++ * Enter preferred direct printing, but never exit. Mark console threads as ++ * unavailable. The system is then forever in preferred direct printing and ++ * any printing threads will exit. ++ * ++ * Must *not* be called under console_lock. Use ++ * __printk_fallback_preferred_direct() if already holding console_lock. ++ */ ++static void printk_fallback_preferred_direct(void) ++{ ++ console_lock(); ++ __printk_fallback_preferred_direct(); ++ console_unlock(); ++} ++ ++/* ++ * Print a record for a given console, not allowing another printk() caller ++ * to take over. This is appropriate for contexts that do not have the ++ * console_lock. ++ * ++ * See __console_emit_next_record() for argument and return details. ++ */ ++static bool console_emit_next_record(struct console *con, char *text, char *ext_text, ++ char *dropped_text) ++{ ++ return __console_emit_next_record(con, text, ext_text, dropped_text, NULL); ++} ++ ++static bool printer_should_wake(struct console *con, u64 seq) ++{ ++ short flags; ++ ++ if (kthread_should_stop() || !printk_kthreads_available) ++ return true; ++ ++ if (con->blocked || ++ console_kthreads_atomically_blocked() || ++ block_console_kthreads || ++ system_state > SYSTEM_RUNNING || ++ oops_in_progress) { ++ return false; ++ } ++ ++ /* ++ * This is an unsafe read from con->flags, but a false positive is ++ * not a problem. Worst case it would allow the printer to wake up ++ * although it is disabled. But the printer will notice that when ++ * attempting to print and instead go back to sleep. ++ */ ++ flags = data_race(READ_ONCE(con->flags)); ++ ++ if (!__console_is_usable(flags)) ++ return false; ++ ++ return prb_read_valid(prb, seq, NULL); ++} ++ ++static int printk_kthread_func(void *data) ++{ ++ struct console *con = data; ++ char *dropped_text = NULL; ++ char *ext_text = NULL; ++ u64 seq = 0; ++ char *text; ++ int error; ++ ++ text = kmalloc(CONSOLE_LOG_MAX, GFP_KERNEL); ++ if (!text) { ++ con_printk(KERN_ERR, con, "failed to allocate text buffer\n"); ++ printk_fallback_preferred_direct(); ++ goto out; ++ } ++ ++ if (con->flags & CON_EXTENDED) { ++ ext_text = kmalloc(CONSOLE_EXT_LOG_MAX, GFP_KERNEL); ++ if (!ext_text) { ++ con_printk(KERN_ERR, con, "failed to allocate ext_text buffer\n"); ++ printk_fallback_preferred_direct(); ++ goto out; ++ } ++ } else { ++ dropped_text = kmalloc(DROPPED_TEXT_MAX, GFP_KERNEL); ++ if (!dropped_text) { ++ con_printk(KERN_ERR, con, "failed to allocate dropped_text buffer\n"); ++ printk_fallback_preferred_direct(); ++ goto out; ++ } ++ } ++ ++ con_printk(KERN_INFO, con, "printing thread started\n"); ++ ++ for (;;) { ++ /* ++ * Guarantee this task is visible on the waitqueue before ++ * checking the wake condition. ++ * ++ * The full memory barrier within set_current_state() of ++ * prepare_to_wait_event() pairs with the full memory barrier ++ * within wq_has_sleeper(). ++ * ++ * This pairs with __wake_up_klogd:A. ++ */ ++ error = wait_event_interruptible(log_wait, ++ printer_should_wake(con, seq)); /* LMM(printk_kthread_func:A) */ ++ ++ if (kthread_should_stop() || !printk_kthreads_available) ++ break; ++ ++ if (error) ++ continue; ++ ++ error = mutex_lock_interruptible(&con->lock); ++ if (error) ++ continue; ++ ++ if (con->blocked || ++ !console_kthread_printing_tryenter()) { ++ /* Another context has locked the console_lock. */ ++ mutex_unlock(&con->lock); ++ continue; ++ } ++ ++ /* ++ * Although this context has not locked the console_lock, it ++ * is known that the console_lock is not locked and it is not ++ * possible for any other context to lock the console_lock. ++ * Therefore it is safe to read con->flags. ++ */ ++ ++ if (!__console_is_usable(con->flags)) { ++ console_kthread_printing_exit(); ++ mutex_unlock(&con->lock); ++ continue; ++ } ++ ++ /* ++ * Even though the printk kthread is always preemptible, it is ++ * still not allowed to call cond_resched() from within ++ * console drivers. The task may become non-preemptible in the ++ * console driver call chain. For example, vt_console_print() ++ * takes a spinlock and then can call into fbcon_redraw(), ++ * which can conditionally invoke cond_resched(). ++ */ ++ console_may_schedule = 0; ++ console_emit_next_record(con, text, ext_text, dropped_text); ++ ++ seq = con->seq; ++ ++ console_kthread_printing_exit(); ++ ++ mutex_unlock(&con->lock); ++ } ++ ++ con_printk(KERN_INFO, con, "printing thread stopped\n"); ++out: ++ kfree(dropped_text); ++ kfree(ext_text); ++ kfree(text); ++ ++ console_lock(); ++ /* ++ * If this kthread is being stopped by another task, con->thread will ++ * already be NULL. That is fine. The important thing is that it is ++ * NULL after the kthread exits. ++ */ ++ con->thread = NULL; ++ console_unlock(); ++ ++ return 0; ++} ++ ++/* Must be called under console_lock. */ ++static void printk_start_kthread(struct console *con) ++{ ++ /* ++ * Do not start a kthread if there is no write() callback. The ++ * kthreads assume the write() callback exists. ++ */ ++ if (!con->write) ++ return; ++ ++ con->thread = kthread_run(printk_kthread_func, con, ++ "pr/%s%d", con->name, con->index); ++ if (IS_ERR(con->thread)) { ++ con->thread = NULL; ++ con_printk(KERN_ERR, con, "unable to start printing thread\n"); ++ __printk_fallback_preferred_direct(); ++ return; ++ } ++} ++ + /* + * Delayed printk version, for scheduler-internal messages: + */ +-#define PRINTK_PENDING_WAKEUP 0x01 +-#define PRINTK_PENDING_OUTPUT 0x02 ++#define PRINTK_PENDING_WAKEUP 0x01 ++#define PRINTK_PENDING_DIRECT_OUTPUT 0x02 + + static DEFINE_PER_CPU(int, printk_pending); + +@@ -3447,10 +3899,14 @@ static void wake_up_klogd_work_func(struct irq_work *irq_work) + { + int pending = this_cpu_xchg(printk_pending, 0); + +- if (pending & PRINTK_PENDING_OUTPUT) { ++ if (pending & PRINTK_PENDING_DIRECT_OUTPUT) { ++ printk_prefer_direct_enter(); ++ + /* If trylock fails, someone else is doing the printing */ + if (console_trylock()) + console_unlock(); ++ ++ printk_prefer_direct_exit(); + } + + if (pending & PRINTK_PENDING_WAKEUP) +@@ -3475,10 +3931,11 @@ static void __wake_up_klogd(int val) + * prepare_to_wait_event(), which is called after ___wait_event() adds + * the waiter but before it has checked the wait condition. + * +- * This pairs with devkmsg_read:A and syslog_print:A. ++ * This pairs with devkmsg_read:A, syslog_print:A, and ++ * printk_kthread_func:A. + */ + if (wq_has_sleeper(&log_wait) || /* LMM(__wake_up_klogd:A) */ +- (val & PRINTK_PENDING_OUTPUT)) { ++ (val & PRINTK_PENDING_DIRECT_OUTPUT)) { + this_cpu_or(printk_pending, val); + irq_work_queue(this_cpu_ptr(&wake_up_klogd_work)); + } +@@ -3496,7 +3953,17 @@ void defer_console_output(void) + * New messages may have been added directly to the ringbuffer + * using vprintk_store(), so wake any waiters as well. + */ +- __wake_up_klogd(PRINTK_PENDING_WAKEUP | PRINTK_PENDING_OUTPUT); ++ int val = PRINTK_PENDING_WAKEUP; ++ ++ /* ++ * Make sure that some context will print the messages when direct ++ * printing is allowed. This happens in situations when the kthreads ++ * may not be as reliable or perhaps unusable. ++ */ ++ if (allow_direct_printing()) ++ val |= PRINTK_PENDING_DIRECT_OUTPUT; ++ ++ __wake_up_klogd(val); + } + + void printk_trigger_flush(void) +diff --git a/kernel/printk/printk_safe.c b/kernel/printk/printk_safe.c +index ef0f9a2044da..caac4de1ea59 100644 +--- a/kernel/printk/printk_safe.c ++++ b/kernel/printk/printk_safe.c +@@ -8,7 +8,9 @@ + #include <linux/smp.h> + #include <linux/cpumask.h> + #include <linux/printk.h> ++#include <linux/console.h> + #include <linux/kprobes.h> ++#include <linux/delay.h> + + #include "internal.h" + +@@ -50,3 +52,33 @@ asmlinkage int vprintk(const char *fmt, va_list args) + return vprintk_default(fmt, args); + } + EXPORT_SYMBOL(vprintk); ++ ++/** ++ * try_block_console_kthreads() - Try to block console kthreads and ++ * make the global console_lock() avaialble ++ * ++ * @timeout_ms: The maximum time (in ms) to wait. ++ * ++ * Prevent console kthreads from starting processing new messages. Wait ++ * until the global console_lock() become available. ++ * ++ * Context: Can be called in any context. ++ */ ++void try_block_console_kthreads(int timeout_ms) ++{ ++ block_console_kthreads = true; ++ ++ /* Do not wait when the console lock could not be safely taken. */ ++ if (this_cpu_read(printk_context) || in_nmi()) ++ return; ++ ++ while (timeout_ms > 0) { ++ if (console_trylock()) { ++ console_unlock(); ++ return; ++ } ++ ++ udelay(1000); ++ timeout_ms -= 1; ++ } ++} +diff --git a/kernel/rcu/tree_stall.h b/kernel/rcu/tree_stall.h +index a001e1e7a992..4995c078cff9 100644 +--- a/kernel/rcu/tree_stall.h ++++ b/kernel/rcu/tree_stall.h +@@ -647,6 +647,7 @@ static void print_cpu_stall(unsigned long gps) + * See Documentation/RCU/stallwarn.rst for info on how to debug + * RCU CPU stall warnings. + */ ++ printk_prefer_direct_enter(); + trace_rcu_stall_warning(rcu_state.name, TPS("SelfDetected")); + pr_err("INFO: %s self-detected stall on CPU\n", rcu_state.name); + raw_spin_lock_irqsave_rcu_node(rdp->mynode, flags); +@@ -684,6 +685,7 @@ static void print_cpu_stall(unsigned long gps) + */ + set_tsk_need_resched(current); + set_preempt_need_resched(); ++ printk_prefer_direct_exit(); + } + + static void check_cpu_stall(struct rcu_data *rdp) +diff --git a/kernel/reboot.c b/kernel/reboot.c +index 6bcc5d6a6572..310363685502 100644 +--- a/kernel/reboot.c ++++ b/kernel/reboot.c +@@ -74,6 +74,7 @@ void kernel_restart_prepare(char *cmd) + { + blocking_notifier_call_chain(&reboot_notifier_list, SYS_RESTART, cmd); + system_state = SYSTEM_RESTART; ++ try_block_console_kthreads(10000); + usermodehelper_disable(); + device_shutdown(); + } +@@ -262,6 +263,7 @@ static void kernel_shutdown_prepare(enum system_states state) + blocking_notifier_call_chain(&reboot_notifier_list, + (state == SYSTEM_HALT) ? SYS_HALT : SYS_POWER_OFF, NULL); + system_state = state; ++ try_block_console_kthreads(10000); + usermodehelper_disable(); + device_shutdown(); + } +@@ -447,9 +449,11 @@ static int __orderly_reboot(void) + ret = run_cmd(reboot_cmd); + + if (ret) { ++ printk_prefer_direct_enter(); + pr_warn("Failed to start orderly reboot: forcing the issue\n"); + emergency_sync(); + kernel_restart(NULL); ++ printk_prefer_direct_exit(); + } + + return ret; +@@ -462,6 +466,7 @@ static int __orderly_poweroff(bool force) + ret = run_cmd(poweroff_cmd); + + if (ret && force) { ++ printk_prefer_direct_enter(); + pr_warn("Failed to start orderly shutdown: forcing the issue\n"); + + /* +@@ -471,6 +476,7 @@ static int __orderly_poweroff(bool force) + */ + emergency_sync(); + kernel_power_off(); ++ printk_prefer_direct_exit(); + } + + return ret; +@@ -528,6 +534,8 @@ EXPORT_SYMBOL_GPL(orderly_reboot); + */ + static void hw_failure_emergency_poweroff_func(struct work_struct *work) + { ++ printk_prefer_direct_enter(); ++ + /* + * We have reached here after the emergency shutdown waiting period has + * expired. This means orderly_poweroff has not been able to shut off +@@ -544,6 +552,8 @@ static void hw_failure_emergency_poweroff_func(struct work_struct *work) + */ + pr_emerg("Hardware protection shutdown failed. Trying emergency restart\n"); + emergency_restart(); ++ ++ printk_prefer_direct_exit(); + } + + static DECLARE_DELAYED_WORK(hw_failure_emergency_poweroff_work, +@@ -582,11 +592,13 @@ void hw_protection_shutdown(const char *reason, int ms_until_forced) + { + static atomic_t allow_proceed = ATOMIC_INIT(1); + ++ printk_prefer_direct_enter(); ++ + pr_emerg("HARDWARE PROTECTION shutdown (%s)\n", reason); + + /* Shutdown should be initiated only once. */ + if (!atomic_dec_and_test(&allow_proceed)) +- return; ++ goto out; + + /* + * Queue a backup emergency shutdown in the event of +@@ -594,6 +606,8 @@ void hw_protection_shutdown(const char *reason, int ms_until_forced) + */ + hw_failure_emergency_poweroff(ms_until_forced); + orderly_poweroff(true); ++out: ++ printk_prefer_direct_exit(); + } + EXPORT_SYMBOL_GPL(hw_protection_shutdown); + +diff --git a/kernel/watchdog.c b/kernel/watchdog.c +index 9166220457bc..40024e03d422 100644 +--- a/kernel/watchdog.c ++++ b/kernel/watchdog.c +@@ -424,6 +424,8 @@ static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer) + /* Start period for the next softlockup warning. */ + update_report_ts(); + ++ printk_prefer_direct_enter(); ++ + pr_emerg("BUG: soft lockup - CPU#%d stuck for %us! [%s:%d]\n", + smp_processor_id(), duration, + current->comm, task_pid_nr(current)); +@@ -442,6 +444,8 @@ static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer) + add_taint(TAINT_SOFTLOCKUP, LOCKDEP_STILL_OK); + if (softlockup_panic) + panic("softlockup: hung tasks"); ++ ++ printk_prefer_direct_exit(); + } + + return HRTIMER_RESTART; +diff --git a/kernel/watchdog_hld.c b/kernel/watchdog_hld.c +index 247bf0b1582c..701f35f0e2d4 100644 +--- a/kernel/watchdog_hld.c ++++ b/kernel/watchdog_hld.c +@@ -135,6 +135,8 @@ static void watchdog_overflow_callback(struct perf_event *event, + if (__this_cpu_read(hard_watchdog_warn) == true) + return; + ++ printk_prefer_direct_enter(); ++ + pr_emerg("Watchdog detected hard LOCKUP on cpu %d\n", + this_cpu); + print_modules(); +@@ -155,6 +157,8 @@ static void watchdog_overflow_callback(struct perf_event *event, + if (hardlockup_panic) + nmi_panic(regs, "Hard LOCKUP"); + ++ printk_prefer_direct_exit(); ++ + __this_cpu_write(hard_watchdog_warn, true); + return; + } diff --git a/debian/patches-rt/rcu-tasks-Use-rcuwait-for-the-rcu_tasks_kthread.patch b/debian/patches-rt/rcu-tasks-Use-rcuwait-for-the-rcu_tasks_kthread.patch deleted file mode 100644 index 4eb8cbaeb..000000000 --- a/debian/patches-rt/rcu-tasks-Use-rcuwait-for-the-rcu_tasks_kthread.patch +++ /dev/null @@ -1,77 +0,0 @@ -From: Sebastian Andrzej Siewior <bigeasy@linutronix.de> -Date: Fri, 4 Mar 2022 09:22:46 +0100 -Subject: [PATCH] rcu-tasks: Use rcuwait for the rcu_tasks_kthread(). -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/5.18/older/patches-5.18-rt11.tar.xz - -The waitqueue used by rcu_tasks_kthread() has always only one waiter. -With a guaranteed only one waiter, this can be replaced with rcuwait -which is smaller and simpler. With rcuwait based wake counterpart, the -irqwork function (call_rcu_tasks_iw_wakeup()) can be invoked hardirq -context because it is only a wake up and no sleeping locks are involved -(unlike the wait_queue_head). -As a side effect, this is also one piece of the puzzle to pass the RCU -selftest at early boot on PREEMPT_RT. - -Replace wait_queue_head with rcuwait and let the irqwork run in hardirq -context on PREEMPT_RT. - -Link: https://lkml.kernel.org/r/YiHy7Y5fTU3jRdMi@linutronix.de -Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> ---- - kernel/rcu/tasks.h | 14 ++++++++------ - 1 file changed, 8 insertions(+), 6 deletions(-) - ---- a/kernel/rcu/tasks.h -+++ b/kernel/rcu/tasks.h -@@ -46,7 +46,7 @@ struct rcu_tasks_percpu { - - /** - * struct rcu_tasks - Definition for a Tasks-RCU-like mechanism. -- * @cbs_wq: Wait queue allowing new callback to get kthread's attention. -+ * @cbs_wait: RCU wait allowing a new callback to get kthread's attention. - * @cbs_gbl_lock: Lock protecting callback list. - * @kthread_ptr: This flavor's grace-period/callback-invocation kthread. - * @gp_func: This flavor's grace-period-wait function. -@@ -77,7 +77,7 @@ struct rcu_tasks_percpu { - * @kname: This flavor's kthread name. - */ - struct rcu_tasks { -- struct wait_queue_head cbs_wq; -+ struct rcuwait cbs_wait; - raw_spinlock_t cbs_gbl_lock; - int gp_state; - int gp_sleep; -@@ -113,11 +113,11 @@ static void call_rcu_tasks_iw_wakeup(str - #define DEFINE_RCU_TASKS(rt_name, gp, call, n) \ - static DEFINE_PER_CPU(struct rcu_tasks_percpu, rt_name ## __percpu) = { \ - .lock = __RAW_SPIN_LOCK_UNLOCKED(rt_name ## __percpu.cbs_pcpu_lock), \ -- .rtp_irq_work = IRQ_WORK_INIT(call_rcu_tasks_iw_wakeup), \ -+ .rtp_irq_work = IRQ_WORK_INIT_HARD(call_rcu_tasks_iw_wakeup), \ - }; \ - static struct rcu_tasks rt_name = \ - { \ -- .cbs_wq = __WAIT_QUEUE_HEAD_INITIALIZER(rt_name.cbs_wq), \ -+ .cbs_wait = __RCUWAIT_INITIALIZER(rt_name.wait), \ - .cbs_gbl_lock = __RAW_SPIN_LOCK_UNLOCKED(rt_name.cbs_gbl_lock), \ - .gp_func = gp, \ - .call_func = call, \ -@@ -261,7 +261,7 @@ static void call_rcu_tasks_iw_wakeup(str - struct rcu_tasks_percpu *rtpcp = container_of(iwp, struct rcu_tasks_percpu, rtp_irq_work); - - rtp = rtpcp->rtpp; -- wake_up(&rtp->cbs_wq); -+ rcuwait_wake_up(&rtp->cbs_wait); - } - - // Enqueue a callback for the specified flavor of Tasks RCU. -@@ -509,7 +509,9 @@ static int __noreturn rcu_tasks_kthread( - set_tasks_gp_state(rtp, RTGS_WAIT_CBS); - - /* If there were none, wait a bit and start over. */ -- wait_event_idle(rtp->cbs_wq, (needgpcb = rcu_tasks_need_gpcb(rtp))); -+ rcuwait_wait_event(&rtp->cbs_wait, -+ (needgpcb = rcu_tasks_need_gpcb(rtp)), -+ TASK_IDLE); - - if (needgpcb & 0x2) { - // Wait for one grace period. diff --git a/debian/patches-rt/rcu-tasks-Use-schedule_hrtimeout_range-while-waiting.patch b/debian/patches-rt/rcu-tasks-Use-schedule_hrtimeout_range-while-waiting.patch deleted file mode 100644 index 282d307d2..000000000 --- a/debian/patches-rt/rcu-tasks-Use-schedule_hrtimeout_range-while-waiting.patch +++ /dev/null @@ -1,43 +0,0 @@ -From: Sebastian Andrzej Siewior <bigeasy@linutronix.de> -Date: Mon, 7 Mar 2022 17:08:23 +0100 -Subject: [PATCH] rcu-tasks: Use schedule_hrtimeout_range() while waiting for - the gp. -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/5.18/older/patches-5.18-rt11.tar.xz - -The RCU selftest is using schedule_timeout_idle() which fails on -PREEMPT_RT because it is used early in boot-up phase an which point -ksoftirqd is not yet ready and is required for the timer to expire. - -To avoid this lockup, use schedule_hrtimeout() and let the timer expire -in hardirq context. This is ensures that the timer fires even on -PREEMPT_RT without any further requirement. - -The timer is set to expire between fract and fract + HZ / 2 jiffies in -order to minimize the amount of extra wake ups and to allign with -possible other timer which expire within this window. - -Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> ---- - kernel/rcu/tasks.h | 7 +++++-- - 1 file changed, 5 insertions(+), 2 deletions(-) - ---- a/kernel/rcu/tasks.h -+++ b/kernel/rcu/tasks.h -@@ -630,12 +630,15 @@ static void rcu_tasks_wait_gp(struct rcu - while (!list_empty(&holdouts)) { - bool firstreport; - bool needreport; -+ ktime_t exp; - int rtst; - - /* Slowly back off waiting for holdouts */ - set_tasks_gp_state(rtp, RTGS_WAIT_SCAN_HOLDOUTS); -- schedule_timeout_idle(fract); -- -+ exp = jiffies_to_nsecs(fract); -+ __set_current_state(TASK_IDLE); -+ schedule_hrtimeout_range(&exp, jiffies_to_nsecs(HZ / 2), -+ HRTIMER_MODE_REL_HARD); - if (fract < HZ) - fract++; - diff --git a/debian/patches-rt/rcutorture-Also-force-sched-priority-to-timersd-on-b.patch b/debian/patches-rt/rcutorture-Also-force-sched-priority-to-timersd-on-b.patch index 94c5b82f0..f71b38df8 100644 --- a/debian/patches-rt/rcutorture-Also-force-sched-priority-to-timersd-on-b.patch +++ b/debian/patches-rt/rcutorture-Also-force-sched-priority-to-timersd-on-b.patch @@ -2,7 +2,7 @@ From: Frederic Weisbecker <frederic@kernel.org> Date: Tue, 5 Apr 2022 03:07:51 +0200 Subject: [PATCH] rcutorture: Also force sched priority to timersd on boosting test. -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/5.18/older/patches-5.18-rt11.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/5.19/older/patches-5.19-rc3-rt5.tar.xz ksoftirqd is statically boosted to the priority level right above the one of rcu_torture_boost() so that timers, which torture readers rely on, @@ -35,7 +35,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> --- a/include/linux/interrupt.h +++ b/include/linux/interrupt.h -@@ -624,6 +624,7 @@ extern void raise_softirq_irqoff(unsigne +@@ -606,6 +606,7 @@ extern void raise_softirq_irqoff(unsigne extern void raise_softirq(unsigned int nr); #ifdef CONFIG_PREEMPT_RT @@ -45,7 +45,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> --- a/kernel/rcu/rcutorture.c +++ b/kernel/rcu/rcutorture.c -@@ -3294,6 +3294,12 @@ rcu_torture_init(void) +@@ -3342,6 +3342,12 @@ rcu_torture_init(void) WARN_ON_ONCE(!t); sp.sched_priority = 2; sched_setscheduler_nocheck(t, SCHED_FIFO, &sp); diff --git a/debian/patches-rt/sched-Consider-task_struct-saved_state-in-wait_task_.patch b/debian/patches-rt/sched-Consider-task_struct-saved_state-in-wait_task_.patch index c75ff7e5f..c25821bef 100644 --- a/debian/patches-rt/sched-Consider-task_struct-saved_state-in-wait_task_.patch +++ b/debian/patches-rt/sched-Consider-task_struct-saved_state-in-wait_task_.patch @@ -1,24 +1,31 @@ From: Sebastian Andrzej Siewior <bigeasy@linutronix.de> -Date: Mon, 2 May 2022 13:58:03 +0200 +Date: Wed, 22 Jun 2022 12:27:05 +0200 Subject: [PATCH] sched: Consider task_struct::saved_state in wait_task_inactive(). -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/5.18/older/patches-5.18-rt11.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/5.19/older/patches-5.19-rc3-rt5.tar.xz Ptrace is using wait_task_inactive() to wait for the tracee to reach a certain task state. On PREEMPT_RT that state may be stored in -task_struct::saved_state while the tracee blocks on a sleeping lock. +task_struct::saved_state while the tracee blocks on a sleeping lock and +task_struct::__state is set to TASK_RTLOCK_WAIT. +It is not possible to check only for TASK_RTLOCK_WAIT to be sure that the task +is blocked on a sleeping lock because during wake up (after the sleeping lock +has been acquired) the task state is set TASK_RUNNING. After the task in on CPU +and acquired the pi_lock it will reset the state accordingly but until then +TASK_RUNNING will be observed (with the desired state saved in saved_state). -In that case wait_task_inactive() should wait until the requested state -is in task_struct::__state and the task idle. +Check also for task_struct::saved_state if the desired match was not found in +task_struct::__state on PREEMPT_RT. If the state was found in saved_state, wait +until the task is idle and state is visible in task_struct::__state. Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> --- - kernel/sched/core.c | 38 ++++++++++++++++++++++++++++++++++---- - 1 file changed, 34 insertions(+), 4 deletions(-) + kernel/sched/core.c | 40 ++++++++++++++++++++++++++++++++++++---- + 1 file changed, 36 insertions(+), 4 deletions(-) --- a/kernel/sched/core.c +++ b/kernel/sched/core.c -@@ -3285,6 +3285,8 @@ unsigned long wait_task_inactive(struct +@@ -3279,6 +3279,8 @@ unsigned long wait_task_inactive(struct struct rq_flags rf; unsigned long ncsw; struct rq *rq; @@ -27,32 +34,34 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> for (;;) { /* -@@ -3307,8 +3309,22 @@ unsigned long wait_task_inactive(struct +@@ -3301,8 +3303,24 @@ unsigned long wait_task_inactive(struct * is actually now running somewhere else! */ while (task_running(rq, p)) { - if (match_state && unlikely(READ_ONCE(p->__state) != match_state)) - return 0; ++ + if (match_state) { ++ bool mismatch = false; ++#ifndef CONFIG_PREEMPT_RT ++ if (READ_ONCE(p->__state != match_state) ++ mismatch = true; ++#else + unsigned long flags; -+ bool missmatch = false; + + raw_spin_lock_irqsave(&p->pi_lock, flags); -+#ifdef CONFIG_PREEMPT_RT -+ if ((READ_ONCE(p->__state) != match_state) && -+ (READ_ONCE(p->saved_state) != match_state)) -+#else -+ if (READ_ONCE(p->__state) != match_state) -+#endif -+ missmatch = true; ++ if (READ_ONCE(p->__state) != match_state && ++ READ_ONCE(p->saved_state) != match_state) ++ mismatch = true; + raw_spin_unlock_irqrestore(&p->pi_lock, flags); -+ if (missmatch) ++#endif ++ if (mismatch) + return 0; + } cpu_relax(); } -@@ -3322,7 +3338,21 @@ unsigned long wait_task_inactive(struct +@@ -3316,7 +3334,21 @@ unsigned long wait_task_inactive(struct running = task_running(rq, p); queued = task_on_rq_queued(p); ncsw = 0; @@ -75,7 +84,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> ncsw = p->nvcsw | LONG_MIN; /* sets MSB */ task_rq_unlock(rq, p, &rf); -@@ -3352,7 +3382,7 @@ unsigned long wait_task_inactive(struct +@@ -3346,7 +3378,7 @@ unsigned long wait_task_inactive(struct * running right now), it's preempted, and we should * yield - it could be a while. */ diff --git a/debian/patches-rt/sched__Add_support_for_lazy_preemption.patch b/debian/patches-rt/sched__Add_support_for_lazy_preemption.patch index d785b8750..b5e342d82 100644 --- a/debian/patches-rt/sched__Add_support_for_lazy_preemption.patch +++ b/debian/patches-rt/sched__Add_support_for_lazy_preemption.patch @@ -1,7 +1,7 @@ Subject: sched: Add support for lazy preemption From: Thomas Gleixner <tglx@linutronix.de> Date: Fri Oct 26 18:50:54 2012 +0100 -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/5.18/older/patches-5.18-rt11.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/5.19/older/patches-5.19-rc3-rt5.tar.xz From: Thomas Gleixner <tglx@linutronix.de> @@ -178,7 +178,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> --- a/include/linux/sched.h +++ b/include/linux/sched.h -@@ -2023,6 +2023,43 @@ static inline int test_tsk_need_resched( +@@ -2026,6 +2026,43 @@ static inline int test_tsk_need_resched( return unlikely(test_tsk_thread_flag(tsk,TIF_NEED_RESCHED)); } @@ -295,7 +295,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> --- a/kernel/sched/core.c +++ b/kernel/sched/core.c -@@ -1065,6 +1065,46 @@ void resched_curr(struct rq *rq) +@@ -1052,6 +1052,46 @@ void resched_curr(struct rq *rq) trace_sched_wake_idle_without_ipi(cpu); } @@ -342,7 +342,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> void resched_cpu(int cpu) { struct rq *rq = cpu_rq(cpu); -@@ -2239,6 +2279,7 @@ void migrate_disable(void) +@@ -2233,6 +2273,7 @@ void migrate_disable(void) preempt_disable(); this_rq()->nr_pinned++; p->migration_disabled = 1; @@ -350,7 +350,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> preempt_enable(); } EXPORT_SYMBOL_GPL(migrate_disable); -@@ -2270,6 +2311,7 @@ void migrate_enable(void) +@@ -2264,6 +2305,7 @@ void migrate_enable(void) barrier(); p->migration_disabled = 0; this_rq()->nr_pinned--; @@ -358,7 +358,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> preempt_enable(); } EXPORT_SYMBOL_GPL(migrate_enable); -@@ -4544,6 +4586,9 @@ int sched_fork(unsigned long clone_flags +@@ -4586,6 +4628,9 @@ int sched_fork(unsigned long clone_flags p->on_cpu = 0; #endif init_task_preempt_count(p); @@ -368,7 +368,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> #ifdef CONFIG_SMP plist_node_init(&p->pushable_tasks, MAX_PRIO); RB_CLEAR_NODE(&p->pushable_dl_tasks); -@@ -6381,6 +6426,7 @@ static void __sched notrace __schedule(u +@@ -6453,6 +6498,7 @@ static void __sched notrace __schedule(u next = pick_next_task(rq, prev, &rf); clear_tsk_need_resched(prev); @@ -376,7 +376,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> clear_preempt_need_resched(); #ifdef CONFIG_SCHED_DEBUG rq->last_seen_need_resched_ns = 0; -@@ -6591,6 +6637,30 @@ static void __sched notrace preempt_sche +@@ -6663,6 +6709,30 @@ static void __sched notrace preempt_sche } while (need_resched()); } @@ -407,7 +407,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> #ifdef CONFIG_PREEMPTION /* * This is the entry point to schedule() from in-kernel preemption -@@ -6604,6 +6674,8 @@ asmlinkage __visible void __sched notrac +@@ -6676,6 +6746,8 @@ asmlinkage __visible void __sched notrac */ if (likely(!preemptible())) return; @@ -416,7 +416,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> preempt_schedule_common(); } NOKPROBE_SYMBOL(preempt_schedule); -@@ -6651,6 +6723,9 @@ asmlinkage __visible void __sched notrac +@@ -6723,6 +6795,9 @@ asmlinkage __visible void __sched notrac if (likely(!preemptible())) return; @@ -426,7 +426,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> do { /* * Because the function tracer can trace preempt_count_sub() -@@ -8868,7 +8943,9 @@ void __init init_idle(struct task_struct +@@ -8951,7 +9026,9 @@ void __init init_idle(struct task_struct /* Set the preempt count _outside_ the spinlocks! */ init_idle_preempt_count(idle, cpu); @@ -439,7 +439,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> */ --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c -@@ -4481,7 +4481,7 @@ check_preempt_tick(struct cfs_rq *cfs_rq +@@ -4492,7 +4492,7 @@ check_preempt_tick(struct cfs_rq *cfs_rq ideal_runtime = sched_slice(cfs_rq, curr); delta_exec = curr->sum_exec_runtime - curr->prev_sum_exec_runtime; if (delta_exec > ideal_runtime) { @@ -448,7 +448,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> /* * The current task ran long enough, ensure it doesn't get * re-elected due to buddy favours. -@@ -4505,7 +4505,7 @@ check_preempt_tick(struct cfs_rq *cfs_rq +@@ -4516,7 +4516,7 @@ check_preempt_tick(struct cfs_rq *cfs_rq return; if (delta > ideal_runtime) @@ -457,7 +457,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> } static void -@@ -4651,7 +4651,7 @@ entity_tick(struct cfs_rq *cfs_rq, struc +@@ -4662,7 +4662,7 @@ entity_tick(struct cfs_rq *cfs_rq, struc * validating it and just reschedule. */ if (queued) { @@ -466,7 +466,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> return; } /* -@@ -4800,7 +4800,7 @@ static void __account_cfs_rq_runtime(str +@@ -4811,7 +4811,7 @@ static void __account_cfs_rq_runtime(str * hierarchy can be throttled */ if (!assign_cfs_rq_runtime(cfs_rq) && likely(cfs_rq->curr)) @@ -475,7 +475,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> } static __always_inline -@@ -5563,7 +5563,7 @@ static void hrtick_start_fair(struct rq +@@ -5574,7 +5574,7 @@ static void hrtick_start_fair(struct rq if (delta < 0) { if (task_current(rq, p)) @@ -484,7 +484,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> return; } hrtick_start(rq, delta); -@@ -7213,7 +7213,7 @@ static void check_preempt_wakeup(struct +@@ -7181,7 +7181,7 @@ static void check_preempt_wakeup(struct return; preempt: @@ -493,7 +493,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> /* * Only set the backward buddy when the current task is still * on the rq. This can happen when a wakeup gets interleaved -@@ -11251,7 +11251,7 @@ static void task_fork_fair(struct task_s +@@ -11220,7 +11220,7 @@ static void task_fork_fair(struct task_s * 'current' within the tree based on its new key value. */ swap(curr->vruntime, se->vruntime); @@ -502,7 +502,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> } se->vruntime -= cfs_rq->min_vruntime; -@@ -11278,7 +11278,7 @@ prio_changed_fair(struct rq *rq, struct +@@ -11247,7 +11247,7 @@ prio_changed_fair(struct rq *rq, struct */ if (task_current(rq, p)) { if (p->prio > oldprio) @@ -525,7 +525,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> /* --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h -@@ -2302,6 +2302,15 @@ extern void reweight_task(struct task_st +@@ -2315,6 +2315,15 @@ extern void reweight_task(struct task_st extern void resched_curr(struct rq *rq); extern void resched_cpu(int cpu); @@ -543,7 +543,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> extern bool sched_rt_bandwidth_account(struct rt_rq *rt_rq); --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c -@@ -2624,11 +2624,19 @@ unsigned int tracing_gen_ctx_irq_test(un +@@ -2625,11 +2625,19 @@ unsigned int tracing_gen_ctx_irq_test(un if (softirq_count() >> (SOFTIRQ_SHIFT + 1)) trace_flags |= TRACE_FLAG_BH_OFF; @@ -565,7 +565,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> (min_t(unsigned int, migration_disable_value(), 0xf)) << 4; } -@@ -4214,15 +4222,17 @@ unsigned long trace_total_entries(struct +@@ -4215,15 +4223,17 @@ unsigned long trace_total_entries(struct static void print_lat_help_header(struct seq_file *m) { @@ -592,7 +592,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> } static void print_event_info(struct array_buffer *buf, struct seq_file *m) -@@ -4256,14 +4266,16 @@ static void print_func_help_header_irq(s +@@ -4257,14 +4267,16 @@ static void print_func_help_header_irq(s print_event_info(buf, m); diff --git a/debian/patches-rt/series b/debian/patches-rt/series index ad3ab10f3..0ebe886e0 100644 --- a/debian/patches-rt/series +++ b/debian/patches-rt/series @@ -3,19 +3,7 @@ ########################################################################### # John's printk queue ########################################################################### -0001-printk-rename-cpulock-functions.patch -0002-printk-cpu-sync-always-disable-interrupts.patch -0004-printk-wake-up-all-waiters.patch -0006-printk-get-caller_id-timestamp-after-migration-disab.patch -0007-printk-call-boot_delay_msec-in-printk_delay.patch -0008-printk-add-con_printk-macro-for-console-details.patch -0009-printk-refactor-and-rework-printing-logic.patch -0010-printk-move-buffer-definitions-into-console_emit_nex.patch -0011-printk-add-pr_flush.patch -0012-printk-add-functions-to-prefer-direct-printing.patch -0013-printk-add-kthread-console-printers.patch -0014-printk-extend-console_lock-for-per-console-locking.patch -0015-printk-remove-console_locked.patch +printk-reinstate-deferred-printing-with-kthreads.patch 0016-printk-add-infrastucture-for-atomic-consoles.patch 0017-serial-8250-implement-write_atomic.patch 0018-printk-avoid-preempt_disable-for-PREEMPT_RT.patch @@ -23,43 +11,18 @@ ########################################################################### # Posted and applied ########################################################################### +arch-Disable-softirq-stacks-on-PREEMPT_RT.patch +blk-mq-Don-t-disable-preemption-around-__blk_mq_run_.patch -# in -tip or traveling to -tip. -lib-irq_poll-Prevent-softirq-pending-leak-in-irq_pol.patch # signal_x86__Delay_calling_signals_in_atomic.patch -# v5, fixed 3/3 of smp: Make softirq handling RT friendly -0001-sched-Fix-missing-prototype-warnings.patch -0002-smp-Rename-flush_smp_call_function_from_idle.patch -0003-smp-Make-softirq-handling-RT-safe-in-flush_smp_call_.patch - ########################################################################### # Posted ########################################################################### -rcu-tasks-Use-rcuwait-for-the-rcu_tasks_kthread.patch -rcu-tasks-Use-schedule_hrtimeout_range-while-waiting.patch -blk-mq-Don-t-disable-preemption-around-__blk_mq_run_.patch -mm-vmalloc-Use-raw_cpu_ptr-for-vmap_block_queue-acce.patch -SUNRPC-Don-t-disable-preemption-while-calling-svc_po.patch -0001-scsi-fcoe-Add-a-local_lock-to-fcoe_percpu.patch -0002-scsi-fcoe-Use-per-CPU-API-to-update-per-CPU-statisti.patch -0003-scsi-libfc-Remove-get_cpu-semantics-in-fc_exch_em_al.patch -0004-scsi-bnx2fc-Avoid-using-get_cpu-in-bnx2fc_cmd_alloc.patch -genirq-irq_sim-Make-the-irq_work-always-run-in-hard-.patch genirq-Provide-generic_handle_domain_irq_safe.patch -# Eric's ptrace, v4 -0001-signal-Rename-send_signal-send_signal_locked.patch -0002-signal-Replace-__group_send_sig_info-with-send_signa.patch -0005-ptrace-Remove-arch_ptrace_attach.patch -0006-signal-Use-lockdep_assert_held-instead-of-assert_spi.patch -0008-ptrace-Document-that-wait_task_inactive-can-t-fail.patch -0009-ptrace-Admit-ptrace_stop-can-generate-spuriuos-SIGTR.patch -0010-ptrace-Don-t-change-__state.patch -0011-ptrace-Always-take-siglock-in-ptrace_resume.patch -0012-sched-signal-ptrace-Rework-TASK_TRACED-TASK_STOPPED-.patch -# Hacks to get it to work. -signal__Revert_ptrace_preempt_magic.patch +# Hacks to get ptrace to work. +signal-Don-t-disable-preemption-in-ptrace_stop-on-PR.patch sched-Consider-task_struct-saved_state-in-wait_task_.patch ########################################################################### @@ -69,8 +32,10 @@ sched-Consider-task_struct-saved_state-in-wait_task_.patch ########################################################################### # fs: ########################################################################### -fs_dcache__disable_preemption_on_i_dir_seqs_write_side.patch -fs-dcache-Delay-dentry-d_lock-outside-of-the-locked-.patch +0001-fs-dcache-Disable-preemption-on-i_dir_seq-write-side.patch +0002-fs-dcache-Split-__d_lookup_done.patch +0003-fs-dcache-Use-__d_lookup_unhash-in-__d_add-move.patch +0004-fs-dcache-Move-wakeup-out-of-i_seq_dir-write-held-re.patch ########################################################################### # X86: @@ -86,8 +51,6 @@ rcutorture-Also-force-sched-priority-to-timersd-on-b.patch tick-Fix-timer-storm-since-introduction-of-timersd.patch tpm_tis__fix_stall_after_iowrites.patch drivers_block_zram__Replace_bit_spinlocks_with_rtmutex_for_-rt.patch -generic-softirq-Disable-softirq-stacks-on-PREEMPT_RT.patch -softirq-Disable-softirq-stacks-on-PREEMPT_RT.patch iio-adc-stm32-adc-Use-generic_handle_domain_irq.patch locking-lockdep-Remove-lockdep_init_map_crosslock.patch @@ -118,11 +81,8 @@ arch_arm64__Add_lazy_preempt_support.patch ########################################################################### # ARM/ARM64 ########################################################################### -jump-label__disable_if_stop_machine_is_used.patch +0001-arm-Disable-jump-label-on-PREEMPT_RT.patch ARM__enable_irq_in_translation_section_permission_fault_handlers.patch -arm64-mm-Make-arch_faults_on_old_pte-check-for-migra.patch -arm64-sve-Delay-freeing-memory-in-fpsimd_flush_threa.patch -arm64-sve-Make-kernel-FPU-protection-RT-friendly.patch # arm64-signal-Use-ARCH_RT_DELAYS_SIGNAL_SEND.patch tty_serial_omap__Make_the_locking_RT_aware.patch tty_serial_pl011__Make_the_locking_work_on_RT.patch diff --git a/debian/patches-rt/signal-Don-t-disable-preemption-in-ptrace_stop-on-PR.patch b/debian/patches-rt/signal-Don-t-disable-preemption-in-ptrace_stop-on-PR.patch new file mode 100644 index 000000000..e221a3703 --- /dev/null +++ b/debian/patches-rt/signal-Don-t-disable-preemption-in-ptrace_stop-on-PR.patch @@ -0,0 +1,59 @@ +From: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +Date: Wed, 22 Jun 2022 11:36:17 +0200 +Subject: [PATCH] signal: Don't disable preemption in ptrace_stop() on + PREEMPT_RT. +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/5.19/older/patches-5.19-rc3-rt5.tar.xz + +Commit + 53da1d9456fe7 ("fix ptrace slowness") + +is just band aid around the problem. +The invocation of do_notify_parent_cldstop() wakes the parent and makes +it runnable. The scheduler then wants to replace this still running task +with the parent. With the read_lock() acquired this is not possible +because preemption is disabled and so this is is deferred until +read_unlock(). This scheduling point is undesired and is avoided by +disabling preemption around the unlock operation enabled again before +the schedule() invocation without a preemption point. +This is only undesired because the parent sleeps a cycle in +wait_task_inactive() until the traced task leaves the run-queue in +schedule(). It is not a correctness issue, it is just band aid to avoid the +visbile delay which sums up over multiple invocations. +The task can still be preempted if an interrupt occurs between +preempt_enable_no_resched() and freezable_schedule() because on the IRQ-exit +path of the interrupt scheduling _will_ happen. This is ignored since it does +not happen very often. + +On PREEMPT_RT keeping preemption disabled during the invocation of +cgroup_enter_frozen() becomes a problem because the function acquires +css_set_lock which is a sleeping lock on PREEMPT_RT and must not be +acquired with disabled preemption. + +Don't disable preemption on PREEMPT_RT. Remove the TODO regarding adding +read_unlock_no_resched() as there is no need for it and will cause harm. + +Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +--- + kernel/signal.c | 8 ++++---- + 1 file changed, 4 insertions(+), 4 deletions(-) + +--- a/kernel/signal.c ++++ b/kernel/signal.c +@@ -2297,13 +2297,13 @@ static int ptrace_stop(int exit_code, in + /* + * Don't want to allow preemption here, because + * sys_ptrace() needs this task to be inactive. +- * +- * XXX: implement read_unlock_no_resched(). + */ +- preempt_disable(); ++ if (!IS_ENABLED(CONFIG_PREEMPT_RT)) ++ preempt_disable(); + read_unlock(&tasklist_lock); + cgroup_enter_frozen(); +- preempt_enable_no_resched(); ++ if (!IS_ENABLED(CONFIG_PREEMPT_RT)) ++ preempt_enable_no_resched(); + freezable_schedule(); + cgroup_leave_frozen(true); + diff --git a/debian/patches-rt/signal__Revert_ptrace_preempt_magic.patch b/debian/patches-rt/signal__Revert_ptrace_preempt_magic.patch deleted file mode 100644 index 2e5a16d68..000000000 --- a/debian/patches-rt/signal__Revert_ptrace_preempt_magic.patch +++ /dev/null @@ -1,37 +0,0 @@ -Subject: signal: Revert ptrace preempt magic -From: Thomas Gleixner <tglx@linutronix.de> -Date: Wed Sep 21 19:57:12 2011 +0200 -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/5.18/older/patches-5.18-rt11.tar.xz - -From: Thomas Gleixner <tglx@linutronix.de> - -Upstream commit '53da1d9456fe7f8 fix ptrace slowness' is nothing more -than a bandaid around the ptrace design trainwreck. It's not a -correctness issue, it's merily a cosmetic bandaid. - -Signed-off-by: Thomas Gleixner <tglx@linutronix.de> - - ---- - kernel/signal.c | 8 -------- - 1 file changed, 8 deletions(-) ---- ---- a/kernel/signal.c -+++ b/kernel/signal.c -@@ -2283,16 +2283,8 @@ static int ptrace_stop(int exit_code, in - if (gstop_done && (!current->ptrace || ptrace_reparented(current))) - do_notify_parent_cldstop(current, false, why); - -- /* -- * Don't want to allow preemption here, because -- * sys_ptrace() needs this task to be inactive. -- * -- * XXX: implement read_unlock_no_resched(). -- */ -- preempt_disable(); - read_unlock(&tasklist_lock); - cgroup_enter_frozen(); -- preempt_enable_no_resched(); - freezable_schedule(); - cgroup_leave_frozen(true); - diff --git a/debian/patches-rt/softirq-Disable-softirq-stacks-on-PREEMPT_RT.patch b/debian/patches-rt/softirq-Disable-softirq-stacks-on-PREEMPT_RT.patch deleted file mode 100644 index e1db82fe1..000000000 --- a/debian/patches-rt/softirq-Disable-softirq-stacks-on-PREEMPT_RT.patch +++ /dev/null @@ -1,108 +0,0 @@ -From: Thomas Gleixner <tglx@linutronix.de> -Date: Fri, 24 Sep 2021 17:05:48 +0200 -Subject: [PATCH] */softirq: Disable softirq stacks on PREEMPT_RT -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/5.18/older/patches-5.18-rt11.tar.xz - -PREEMPT_RT preempts softirqs and the current implementation avoids -do_softirq_own_stack() and only uses __do_softirq(). - -Disable the unused softirqs stacks on PREEMPT_RT to safe some memory and -ensure that do_softirq_own_stack() is not used which is not expected. - -[bigeasy: commit description.] - -Signed-off-by: Thomas Gleixner <tglx@linutronix.de> -Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> ---- - arch/arm/kernel/irq.c | 3 ++- - arch/powerpc/kernel/irq.c | 4 ++++ - arch/sh/kernel/irq.c | 2 ++ - arch/sparc/kernel/irq_64.c | 2 ++ - 4 files changed, 10 insertions(+), 1 deletion(-) - ---- a/arch/arm/kernel/irq.c -+++ b/arch/arm/kernel/irq.c -@@ -70,6 +70,7 @@ static void __init init_irq_stacks(void) - } - } - -+#ifndef CONFIG_PREEMPT_RT - static void ____do_softirq(void *arg) - { - __do_softirq(); -@@ -80,7 +81,7 @@ void do_softirq_own_stack(void) - call_with_stack(____do_softirq, NULL, - __this_cpu_read(irq_stack_ptr)); - } -- -+#endif - #endif - - int arch_show_interrupts(struct seq_file *p, int prec) ---- a/arch/powerpc/kernel/irq.c -+++ b/arch/powerpc/kernel/irq.c -@@ -689,6 +689,7 @@ static inline void check_stack_overflow( - } - } - -+#ifndef CONFIG_PREEMPT_RT - static __always_inline void call_do_softirq(const void *sp) - { - /* Temporarily switch r1 to sp, call __do_softirq() then restore r1. */ -@@ -707,6 +708,7 @@ static __always_inline void call_do_soft - "r11", "r12" - ); - } -+#endif - - static __always_inline void call_do_irq(struct pt_regs *regs, void *sp) - { -@@ -820,10 +822,12 @@ void *mcheckirq_ctx[NR_CPUS] __read_most - void *softirq_ctx[NR_CPUS] __read_mostly; - void *hardirq_ctx[NR_CPUS] __read_mostly; - -+#ifndef CONFIG_PREEMPT_RT - void do_softirq_own_stack(void) - { - call_do_softirq(softirq_ctx[smp_processor_id()]); - } -+#endif - - irq_hw_number_t virq_to_hw(unsigned int virq) - { ---- a/arch/sh/kernel/irq.c -+++ b/arch/sh/kernel/irq.c -@@ -149,6 +149,7 @@ void irq_ctx_exit(int cpu) - hardirq_ctx[cpu] = NULL; - } - -+#ifndef CONFIG_PREEMPT_RT - void do_softirq_own_stack(void) - { - struct thread_info *curctx; -@@ -176,6 +177,7 @@ void do_softirq_own_stack(void) - "r5", "r6", "r7", "r8", "r9", "r15", "t", "pr" - ); - } -+#endif - #else - static inline void handle_one_irq(unsigned int irq) - { ---- a/arch/sparc/kernel/irq_64.c -+++ b/arch/sparc/kernel/irq_64.c -@@ -855,6 +855,7 @@ void __irq_entry handler_irq(int pil, st - set_irq_regs(old_regs); - } - -+#ifndef CONFIG_PREEMPT_RT - void do_softirq_own_stack(void) - { - void *orig_sp, *sp = softirq_stack[smp_processor_id()]; -@@ -869,6 +870,7 @@ void do_softirq_own_stack(void) - __asm__ __volatile__("mov %0, %%sp" - : : "r" (orig_sp)); - } -+#endif - - #ifdef CONFIG_HOTPLUG_CPU - void fixup_irqs(void) diff --git a/debian/patches-rt/softirq-Use-a-dedicated-thread-for-timer-wakeups.patch b/debian/patches-rt/softirq-Use-a-dedicated-thread-for-timer-wakeups.patch index e4b0037c6..fd76b9846 100644 --- a/debian/patches-rt/softirq-Use-a-dedicated-thread-for-timer-wakeups.patch +++ b/debian/patches-rt/softirq-Use-a-dedicated-thread-for-timer-wakeups.patch @@ -1,7 +1,7 @@ From: Sebastian Andrzej Siewior <bigeasy@linutronix.de> Date: Wed, 1 Dec 2021 17:41:09 +0100 Subject: [PATCH] softirq: Use a dedicated thread for timer wakeups. -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/5.18/older/patches-5.18-rt11.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/5.19/older/patches-5.19-rc3-rt5.tar.xz A timer/hrtimer softirq is raised in-IRQ context. With threaded interrupts enabled or on PREEMPT_RT this leads to waking the ksoftirqd @@ -46,7 +46,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> --- a/include/linux/interrupt.h +++ b/include/linux/interrupt.h -@@ -623,6 +623,22 @@ extern void __raise_softirq_irqoff(unsig +@@ -605,6 +605,22 @@ extern void __raise_softirq_irqoff(unsig extern void raise_softirq_irqoff(unsigned int nr); extern void raise_softirq(unsigned int nr); @@ -195,7 +195,7 @@ Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> __hrtimer_run_queues(cpu_base, now, flags, HRTIMER_ACTIVE_HARD); --- a/kernel/time/timer.c +++ b/kernel/time/timer.c -@@ -1769,7 +1769,7 @@ static void run_local_timers(void) +@@ -1822,7 +1822,7 @@ static void run_local_timers(void) if (time_before(jiffies, base->next_expiry)) return; } diff --git a/debian/patches-rt/sysfs__Add__sys_kernel_realtime_entry.patch b/debian/patches-rt/sysfs__Add__sys_kernel_realtime_entry.patch index 2b4a59c0b..fd21e62e9 100644 --- a/debian/patches-rt/sysfs__Add__sys_kernel_realtime_entry.patch +++ b/debian/patches-rt/sysfs__Add__sys_kernel_realtime_entry.patch @@ -1,7 +1,7 @@ Subject: sysfs: Add /sys/kernel/realtime entry From: Clark Williams <williams@redhat.com> Date: Sat Jul 30 21:55:53 2011 -0500 -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/5.18/older/patches-5.18-rt11.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/5.19/older/patches-5.19-rc3-rt5.tar.xz From: Clark Williams <williams@redhat.com> diff --git a/debian/patches-rt/tick-Fix-timer-storm-since-introduction-of-timersd.patch b/debian/patches-rt/tick-Fix-timer-storm-since-introduction-of-timersd.patch index daf9e11ee..bd9ec0def 100644 --- a/debian/patches-rt/tick-Fix-timer-storm-since-introduction-of-timersd.patch +++ b/debian/patches-rt/tick-Fix-timer-storm-since-introduction-of-timersd.patch @@ -1,7 +1,7 @@ From: Frederic Weisbecker <frederic@kernel.org> Date: Tue, 5 Apr 2022 03:07:52 +0200 Subject: [PATCH] tick: Fix timer storm since introduction of timersd -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/5.18/older/patches-5.18-rt11.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/5.19/older/patches-5.19-rc3-rt5.tar.xz If timers are pending while the tick is reprogrammed on nohz_mode, the next expiry is not armed to fire now, it is delayed one jiffy forward @@ -48,7 +48,7 @@ Link: https://lkml.kernel.org/r/20220405010752.1347437-2-frederic@kernel.org --- a/include/linux/interrupt.h +++ b/include/linux/interrupt.h -@@ -625,9 +625,16 @@ extern void raise_softirq(unsigned int n +@@ -607,9 +607,16 @@ extern void raise_softirq(unsigned int n #ifdef CONFIG_PREEMPT_RT DECLARE_PER_CPU(struct task_struct *, timersd); @@ -65,7 +65,7 @@ Link: https://lkml.kernel.org/r/20220405010752.1347437-2-frederic@kernel.org #else static inline void raise_timer_softirq(void) { -@@ -638,6 +645,11 @@ static inline void raise_hrtimer_softirq +@@ -620,6 +627,11 @@ static inline void raise_hrtimer_softirq { raise_softirq_irqoff(HRTIMER_SOFTIRQ); } diff --git a/debian/patches-rt/tpm_tis__fix_stall_after_iowrites.patch b/debian/patches-rt/tpm_tis__fix_stall_after_iowrites.patch index cee91cadd..128ec7323 100644 --- a/debian/patches-rt/tpm_tis__fix_stall_after_iowrites.patch +++ b/debian/patches-rt/tpm_tis__fix_stall_after_iowrites.patch @@ -1,7 +1,7 @@ Subject: tpm_tis: fix stall after iowrite*()s From: Haris Okanovic <haris.okanovic@ni.com> Date: Tue Aug 15 15:13:08 2017 -0500 -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/5.18/older/patches-5.18-rt11.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/5.19/older/patches-5.19-rc3-rt5.tar.xz From: Haris Okanovic <haris.okanovic@ni.com> @@ -63,21 +63,18 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> static int interrupts = -1; module_param(interrupts, int, 0444); MODULE_PARM_DESC(interrupts, "Enable interrupts"); -@@ -169,7 +194,7 @@ static int tpm_tcg_write_bytes(struct tp - struct tpm_tis_tcg_phy *phy = to_tpm_tis_tcg_phy(data); +@@ -185,12 +210,12 @@ static int tpm_tcg_write_bytes(struct tp + switch (io_mode) { + case TPM_TIS_PHYS_8: + while (len--) +- iowrite8(*value++, phy->iobase + addr); ++ tpm_tis_iowrite8(*value++, phy->iobase, addr); + break; + case TPM_TIS_PHYS_16: + return -EINVAL; + case TPM_TIS_PHYS_32: +- iowrite32(le32_to_cpu(*((__le32 *)value)), phy->iobase + addr); ++ tpm_tis_iowrite32(le32_to_cpu(*((__le32 *)value)), phy->iobase, addr); + break; + } - while (len--) -- iowrite8(*value++, phy->iobase + addr); -+ tpm_tis_iowrite8(*value++, phy->iobase, addr); - - return 0; - } -@@ -196,7 +221,7 @@ static int tpm_tcg_write32(struct tpm_ti - { - struct tpm_tis_tcg_phy *phy = to_tpm_tis_tcg_phy(data); - -- iowrite32(value, phy->iobase + addr); -+ tpm_tis_iowrite32(value, phy->iobase, addr); - - return 0; - } diff --git a/debian/patches-rt/tty_serial_omap__Make_the_locking_RT_aware.patch b/debian/patches-rt/tty_serial_omap__Make_the_locking_RT_aware.patch index e2c87daa3..fccc49326 100644 --- a/debian/patches-rt/tty_serial_omap__Make_the_locking_RT_aware.patch +++ b/debian/patches-rt/tty_serial_omap__Make_the_locking_RT_aware.patch @@ -1,7 +1,7 @@ Subject: tty/serial/omap: Make the locking RT aware From: Thomas Gleixner <tglx@linutronix.de> Date: Thu Jul 28 13:32:57 2011 +0200 -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/5.18/older/patches-5.18-rt11.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/5.19/older/patches-5.19-rc3-rt5.tar.xz From: Thomas Gleixner <tglx@linutronix.de> diff --git a/debian/patches-rt/tty_serial_pl011__Make_the_locking_work_on_RT.patch b/debian/patches-rt/tty_serial_pl011__Make_the_locking_work_on_RT.patch index dac908716..c36b006c1 100644 --- a/debian/patches-rt/tty_serial_pl011__Make_the_locking_work_on_RT.patch +++ b/debian/patches-rt/tty_serial_pl011__Make_the_locking_work_on_RT.patch @@ -1,7 +1,7 @@ Subject: tty/serial/pl011: Make the locking work on RT From: Thomas Gleixner <tglx@linutronix.de> Date: Tue Jan 8 21:36:51 2013 +0100 -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/5.18/older/patches-5.18-rt11.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/5.19/older/patches-5.19-rc3-rt5.tar.xz From: Thomas Gleixner <tglx@linutronix.de> @@ -17,7 +17,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> --- --- a/drivers/tty/serial/amba-pl011.c +++ b/drivers/tty/serial/amba-pl011.c -@@ -2275,18 +2275,24 @@ pl011_console_write(struct console *co, +@@ -2289,18 +2289,24 @@ pl011_console_write(struct console *co, { struct uart_amba_port *uap = amba_ports[co->index]; unsigned int old_cr = 0, new_cr; @@ -46,7 +46,7 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> /* * First save the CR then disable the interrupts -@@ -2312,8 +2318,7 @@ pl011_console_write(struct console *co, +@@ -2326,8 +2332,7 @@ pl011_console_write(struct console *co, pl011_write(old_cr, uap, REG_CR); if (locked) diff --git a/debian/patches-rt/x86__Allow_to_enable_RT.patch b/debian/patches-rt/x86__Allow_to_enable_RT.patch index 95027791b..7effbe598 100644 --- a/debian/patches-rt/x86__Allow_to_enable_RT.patch +++ b/debian/patches-rt/x86__Allow_to_enable_RT.patch @@ -1,7 +1,7 @@ Subject: x86: Allow to enable RT From: Sebastian Andrzej Siewior <bigeasy@linutronix.de> Date: Wed Aug 7 18:15:38 2019 +0200 -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/5.18/older/patches-5.18-rt11.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/5.19/older/patches-5.19-rc3-rt5.tar.xz From: Sebastian Andrzej Siewior <bigeasy@linutronix.de> diff --git a/debian/patches-rt/x86__Enable_RT_also_on_32bit.patch b/debian/patches-rt/x86__Enable_RT_also_on_32bit.patch index 66f6cb6dd..232533fbd 100644 --- a/debian/patches-rt/x86__Enable_RT_also_on_32bit.patch +++ b/debian/patches-rt/x86__Enable_RT_also_on_32bit.patch @@ -1,7 +1,7 @@ Subject: x86: Enable RT also on 32bit From: Sebastian Andrzej Siewior <bigeasy@linutronix.de> Date: Thu Nov 7 17:49:20 2019 +0100 -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/5.18/older/patches-5.18-rt11.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/5.19/older/patches-5.19-rc3-rt5.tar.xz From: Sebastian Andrzej Siewior <bigeasy@linutronix.de> diff --git a/debian/patches-rt/x86__Support_for_lazy_preemption.patch b/debian/patches-rt/x86__Support_for_lazy_preemption.patch index 5d54cd65a..8835b9b31 100644 --- a/debian/patches-rt/x86__Support_for_lazy_preemption.patch +++ b/debian/patches-rt/x86__Support_for_lazy_preemption.patch @@ -1,7 +1,7 @@ Subject: x86: Support for lazy preemption From: Thomas Gleixner <tglx@linutronix.de> Date: Thu Nov 1 11:03:47 2012 +0100 -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/5.18/older/patches-5.18-rt11.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/5.19/older/patches-5.19-rc3-rt5.tar.xz From: Thomas Gleixner <tglx@linutronix.de> @@ -20,12 +20,12 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> --- --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig -@@ -240,6 +240,7 @@ config X86 +@@ -245,6 +245,7 @@ config X86 select HAVE_PCI select HAVE_PERF_REGS select HAVE_PERF_USER_STACK_DUMP + select HAVE_PREEMPT_LAZY - select MMU_GATHER_RCU_TABLE_FREE if PARAVIRT + select MMU_GATHER_RCU_TABLE_FREE if PARAVIRT select HAVE_POSIX_CPU_TIMERS_TASK_WORK select HAVE_REGS_AND_STACK_ACCESS_API --- a/arch/x86/include/asm/preempt.h @@ -99,23 +99,23 @@ Signed-off-by: Thomas Gleixner <tglx@linutronix.de> } #else /* !__ASSEMBLY__ */ -@@ -93,6 +96,7 @@ struct thread_info { +@@ -92,6 +95,7 @@ struct thread_info { + #define TIF_NOCPUID 15 /* CPUID is not accessible in userland */ #define TIF_NOTSC 16 /* TSC is not accessible in userland */ #define TIF_NOTIFY_SIGNAL 17 /* signal notifications exist */ - #define TIF_SLD 18 /* Restore split lock detection on context switch */ +#define TIF_NEED_RESCHED_LAZY 19 /* lazy rescheduling necessary */ #define TIF_MEMDIE 20 /* is terminating due to OOM killer */ #define TIF_POLLING_NRFLAG 21 /* idle is polling for TIF_NEED_RESCHED */ #define TIF_IO_BITMAP 22 /* uses I/O bitmap */ -@@ -117,6 +121,7 @@ struct thread_info { +@@ -115,6 +119,7 @@ struct thread_info { + #define _TIF_NOCPUID (1 << TIF_NOCPUID) #define _TIF_NOTSC (1 << TIF_NOTSC) #define _TIF_NOTIFY_SIGNAL (1 << TIF_NOTIFY_SIGNAL) - #define _TIF_SLD (1 << TIF_SLD) +#define _TIF_NEED_RESCHED_LAZY (1 << TIF_NEED_RESCHED_LAZY) #define _TIF_POLLING_NRFLAG (1 << TIF_POLLING_NRFLAG) #define _TIF_IO_BITMAP (1 << TIF_IO_BITMAP) #define _TIF_SPEC_FORCE_UPDATE (1 << TIF_SPEC_FORCE_UPDATE) -@@ -148,6 +153,8 @@ struct thread_info { +@@ -146,6 +151,8 @@ struct thread_info { #define _TIF_WORK_CTXSW_NEXT (_TIF_WORK_CTXSW) diff --git a/debian/patches-rt/x86_entry__Use_should_resched_in_idtentry_exit_cond_resched.patch b/debian/patches-rt/x86_entry__Use_should_resched_in_idtentry_exit_cond_resched.patch index b9d835a0d..cb1849612 100644 --- a/debian/patches-rt/x86_entry__Use_should_resched_in_idtentry_exit_cond_resched.patch +++ b/debian/patches-rt/x86_entry__Use_should_resched_in_idtentry_exit_cond_resched.patch @@ -1,7 +1,7 @@ Subject: x86/entry: Use should_resched() in idtentry_exit_cond_resched() From: Sebastian Andrzej Siewior <bigeasy@linutronix.de> Date: Tue Jun 30 11:45:14 2020 +0200 -Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/5.18/older/patches-5.18-rt11.tar.xz +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/5.19/older/patches-5.19-rc3-rt5.tar.xz From: Sebastian Andrzej Siewior <bigeasy@linutronix.de> diff --git a/debian/patches/bugfix/all/firmware-remove-redundant-log-messages-from-drivers.patch b/debian/patches/bugfix/all/firmware-remove-redundant-log-messages-from-drivers.patch index c842d98b3..f6a652b21 100644 --- a/debian/patches/bugfix/all/firmware-remove-redundant-log-messages-from-drivers.patch +++ b/debian/patches/bugfix/all/firmware-remove-redundant-log-messages-from-drivers.patch @@ -23,20 +23,6 @@ upstream submission. ret = UCODE_ERROR; if (!verify_container(fw->data, fw->size, false)) ---- a/drivers/atm/ambassador.c -+++ b/drivers/atm/ambassador.c -@@ -1914,10 +1914,8 @@ static int ucode_init(loader_block *lb, - int res; - - res = request_ihex_firmware(&fw, "atmsar11.fw", &dev->pci_dev->dev); -- if (res) { -- PRINTK (KERN_ERR, "Cannot load microcode data"); -+ if (res) - return res; -- } - - /* First record contains just the start address */ - rec = (const struct ihex_binrec *)fw->data; --- a/drivers/atm/fore200e.c +++ b/drivers/atm/fore200e.c @@ -2398,10 +2398,9 @@ static int fore200e_load_and_start_fw(st @@ -161,7 +147,7 @@ upstream submission. fw->size, fw_name); --- a/drivers/dma/imx-sdma.c +++ b/drivers/dma/imx-sdma.c -@@ -1816,11 +1816,8 @@ static void sdma_load_firmware(const str +@@ -1882,11 +1882,8 @@ static void sdma_load_firmware(const str const struct sdma_script_start_addrs *addr; unsigned short *ram_code; @@ -858,7 +844,7 @@ upstream submission. if (!buf) { --- a/drivers/media/usb/dvb-usb/opera1.c +++ b/drivers/media/usb/dvb-usb/opera1.c -@@ -450,8 +450,6 @@ static int opera1_xilinx_load_firmware(s +@@ -455,8 +455,6 @@ static int opera1_xilinx_load_firmware(s info("start downloading fpga firmware %s",filename); if ((ret = request_firmware(&fw, filename, &dev->dev)) != 0) { @@ -1235,7 +1221,7 @@ upstream submission. @@ -580,8 +580,6 @@ static int myri10ge_load_hotplug_firmwar unsigned i; - if ((status = request_firmware(&fw, mgp->fw_name, dev)) < 0) { + if (request_firmware(&fw, mgp->fw_name, dev) < 0) { - dev_err(dev, "Unable to load %s firmware image via hotplug\n", - mgp->fw_name); status = -EINVAL; @@ -1301,7 +1287,7 @@ upstream submission. dev_err(&kaweth->intf->dev, "Firmware too big: %zu\n", --- a/drivers/net/wireless/ath/ath9k/hif_usb.c +++ b/drivers/net/wireless/ath/ath9k/hif_usb.c -@@ -1223,9 +1223,6 @@ static void ath9k_hif_usb_firmware_cb(co +@@ -1222,9 +1222,6 @@ static void ath9k_hif_usb_firmware_cb(co if (!ret) return; @@ -1434,7 +1420,7 @@ upstream submission. --- a/drivers/net/wireless/intel/ipw2x00/ipw2200.c +++ b/drivers/net/wireless/intel/ipw2x00/ipw2200.c -@@ -3396,10 +3396,8 @@ static int ipw_get_fw(struct ipw_priv *p +@@ -3393,10 +3393,8 @@ static int ipw_get_fw(struct ipw_priv *p /* ask firmware_class module to get the boot firmware off disk */ rc = request_firmware(raw, name, &priv->pci_dev->dev); @@ -1684,7 +1670,7 @@ upstream submission. wl1251_error("nvs size is not multiple of 32 bits: %zu", --- a/drivers/net/wireless/ti/wlcore/main.c +++ b/drivers/net/wireless/ti/wlcore/main.c -@@ -764,10 +764,8 @@ static int wl12xx_fetch_firmware(struct +@@ -756,10 +756,8 @@ static int wl12xx_fetch_firmware(struct ret = request_firmware(&fw, fw_name, wl->dev); @@ -1878,7 +1864,7 @@ upstream submission. if (qla82xx_validate_firmware_blob(vha, --- a/drivers/scsi/qla2xxx/qla_os.c +++ b/drivers/scsi/qla2xxx/qla_os.c -@@ -7558,8 +7558,6 @@ qla2x00_request_firmware(scsi_qla_host_t +@@ -7557,8 +7557,6 @@ qla2x00_request_firmware(scsi_qla_host_t goto out; if (request_firmware(&blob->fw, blob->name, &ha->pdev->dev)) { @@ -1961,7 +1947,7 @@ upstream submission. --- a/drivers/tty/serial/icom.c +++ b/drivers/tty/serial/icom.c -@@ -362,7 +362,6 @@ static void load_code(struct icom_port * +@@ -621,7 +621,6 @@ static void load_code(struct icom_port * /* Load Call Setup into Adapter */ if (request_firmware(&fw, "icom_call_setup.bin", &dev->dev) < 0) { @@ -1969,7 +1955,7 @@ upstream submission. status = -1; goto load_code_exit; } -@@ -382,7 +381,6 @@ static void load_code(struct icom_port * +@@ -641,7 +640,6 @@ static void load_code(struct icom_port * /* Load Resident DCE portion of Adapter */ if (request_firmware(&fw, "icom_res_dce.bin", &dev->dev) < 0) { @@ -1977,7 +1963,7 @@ upstream submission. status = -1; goto load_code_exit; } -@@ -427,7 +425,6 @@ static void load_code(struct icom_port * +@@ -686,7 +684,6 @@ static void load_code(struct icom_port * } if (request_firmware(&fw, "icom_asc.bin", &dev->dev) < 0) { @@ -2156,7 +2142,7 @@ upstream submission. BootMajorVersion = rec->data[0]; --- a/drivers/usb/serial/io_ti.c +++ b/drivers/usb/serial/io_ti.c -@@ -1006,8 +1006,6 @@ static int download_fw(struct edgeport_s +@@ -1008,8 +1008,6 @@ static int download_fw(struct edgeport_s status = request_firmware(&fw, fw_name, dev); if (status) { @@ -2195,7 +2181,7 @@ upstream submission. if ((fw_entry->size < 8*1024) || (fw_entry->size > 64*1024)) { --- a/drivers/video/fbdev/metronomefb.c +++ b/drivers/video/fbdev/metronomefb.c -@@ -680,10 +680,8 @@ static int metronomefb_probe(struct plat +@@ -679,10 +679,8 @@ static int metronomefb_probe(struct plat a) request the waveform file from userspace b) process waveform and decode into metromem */ retval = request_firmware(&fw_entry, "metronome.wbf", &dev->dev); @@ -2270,7 +2256,7 @@ upstream submission. snd_printk(KERN_INFO "sscape: MIDI firmware loaded %zu KBs\n", --- a/sound/isa/wavefront/wavefront_synth.c +++ b/sound/isa/wavefront/wavefront_synth.c -@@ -1971,10 +1971,8 @@ wavefront_download_firmware (snd_wavefro +@@ -1972,10 +1972,8 @@ wavefront_download_firmware (snd_wavefro const struct firmware *firmware; err = request_firmware(&firmware, path, dev->card->dev); @@ -2421,7 +2407,7 @@ upstream submission. "too short firmware size %d (expected %d)\n", --- a/sound/soc/codecs/wm2000.c +++ b/sound/soc/codecs/wm2000.c -@@ -894,10 +894,8 @@ static int wm2000_i2c_probe(struct i2c_c +@@ -892,10 +892,8 @@ static int wm2000_i2c_probe(struct i2c_c } ret = request_firmware(&fw, filename, &i2c->dev); diff --git a/debian/patches/bugfix/all/kbuild-fix-recordmcount-dependency.patch b/debian/patches/bugfix/all/kbuild-fix-recordmcount-dependency.patch index 1a65b3869..8f9c21f56 100644 --- a/debian/patches/bugfix/all/kbuild-fix-recordmcount-dependency.patch +++ b/debian/patches/bugfix/all/kbuild-fix-recordmcount-dependency.patch @@ -9,7 +9,7 @@ sources. --- a/scripts/Makefile.build +++ b/scripts/Makefile.build -@@ -223,6 +223,11 @@ cmd_record_mcount = $(if $(findstring $( +@@ -210,6 +210,11 @@ cmd_record_mcount = $(if $(findstring $( $(sub_cmd_record_mcount)) endif # CONFIG_FTRACE_MCOUNT_USE_RECORDMCOUNT @@ -18,6 +18,6 @@ sources. +recordmcount_source := +endif + - ifdef CONFIG_STACK_VALIDATION - - objtool := $(objtree)/tools/objtool/objtool + # 'OBJECT_FILES_NON_STANDARD := y': skip objtool checking for a directory + # 'OBJECT_FILES_NON_STANDARD_foo.o := 'y': skip objtool checking for a file + # 'OBJECT_FILES_NON_STANDARD_foo.o := 'n': override directory skip for a file diff --git a/debian/patches/bugfix/all/module-disable-matching-missing-version-crc.patch b/debian/patches/bugfix/all/module-disable-matching-missing-version-crc.patch index 2ab62688a..8e0a7ee76 100644 --- a/debian/patches/bugfix/all/module-disable-matching-missing-version-crc.patch +++ b/debian/patches/bugfix/all/module-disable-matching-missing-version-crc.patch @@ -7,9 +7,9 @@ This partly reverts commit cd3caefb4663e3811d37cc2afad3cce642d60061. We want to fail closed if a symbol version CRC is missing, as the alternative may allow subverting module signing. --- ---- a/kernel/module.c -+++ b/kernel/module.c -@@ -1313,9 +1313,8 @@ static int check_version(const struct lo +--- a/kernel/module/version.c ++++ b/kernel/module/version.c +@@ -46,9 +46,8 @@ int check_version(const struct load_info goto bad_version; } @@ -20,4 +20,4 @@ alternative may allow subverting module signing. + return 0; bad_version: - pr_warn("%s: disagrees about version of symbol %s\n", + pr_warn("%s: disagrees about version of symbol %s\n", info->name, symname); diff --git a/debian/patches/bugfix/all/radeon-amdgpu-firmware-is-required-for-drm-and-kms-on-r600-onward.patch b/debian/patches/bugfix/all/radeon-amdgpu-firmware-is-required-for-drm-and-kms-on-r600-onward.patch index 4ff66b31c..676ddfa18 100644 --- a/debian/patches/bugfix/all/radeon-amdgpu-firmware-is-required-for-drm-and-kms-on-r600-onward.patch +++ b/debian/patches/bugfix/all/radeon-amdgpu-firmware-is-required-for-drm-and-kms-on-r600-onward.patch @@ -30,16 +30,16 @@ is missing, except for the pre-R600 case. --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c -@@ -39,6 +39,8 @@ +@@ -38,6 +38,8 @@ + #include <linux/mmu_notifier.h> #include <linux/suspend.h> #include <linux/cc_platform.h> - #include <linux/fb.h> +#include <linux/namei.h> +#include <linux/path.h> #include "amdgpu.h" #include "amdgpu_irq.h" -@@ -1992,6 +1994,28 @@ static void amdgpu_get_secondary_funcs(s +@@ -1983,6 +1985,28 @@ static void amdgpu_get_secondary_funcs(s } } @@ -68,9 +68,9 @@ is missing, except for the pre-R600 case. static int amdgpu_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent) { -@@ -2072,6 +2096,11 @@ static int amdgpu_pci_probe(struct pci_d - size = pci_resource_len(pdev, 0); - is_fw_fb = amdgpu_is_fw_framebuffer(base, size); +@@ -2057,6 +2081,11 @@ static int amdgpu_pci_probe(struct pci_d + } + #endif + if (!amdgpu_firmware_installed()) { + DRM_ERROR("amdgpu requires firmware installed\n"); diff --git a/debian/patches/bugfix/x86/retbleed/0001-x86-kvm-vmx-Make-noinstr-clean.patch b/debian/patches/bugfix/x86/retbleed/0001-x86-kvm-vmx-Make-noinstr-clean.patch new file mode 100644 index 000000000..e2f1c4b9e --- /dev/null +++ b/debian/patches/bugfix/x86/retbleed/0001-x86-kvm-vmx-Make-noinstr-clean.patch @@ -0,0 +1,74 @@ +From: Peter Zijlstra <peterz@infradead.org> +Date: Tue, 14 Jun 2022 23:15:32 +0200 +Subject: x86/kvm/vmx: Make noinstr clean +Origin: https://git.kernel.org/linus/742ab6df974ae8384a2dd213db1a3a06cf6d8936 + +The recent mmio_stale_data fixes broke the noinstr constraints: + + vmlinux.o: warning: objtool: vmx_vcpu_enter_exit+0x15b: call to wrmsrl.constprop.0() leaves .noinstr.text section + vmlinux.o: warning: objtool: vmx_vcpu_enter_exit+0x1bf: call to kvm_arch_has_assigned_device() leaves .noinstr.text section + +make it all happy again. + +Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> +Signed-off-by: Borislav Petkov <bp@suse.de> +--- + arch/x86/kvm/vmx/vmx.c | 6 +++--- + arch/x86/kvm/x86.c | 4 ++-- + include/linux/kvm_host.h | 2 +- + 3 files changed, 6 insertions(+), 6 deletions(-) + +diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c +index 3a919e49129b..009bbae9ad66 100644 +--- a/arch/x86/kvm/vmx/vmx.c ++++ b/arch/x86/kvm/vmx/vmx.c +@@ -383,9 +383,9 @@ static __always_inline void vmx_disable_fb_clear(struct vcpu_vmx *vmx) + if (!vmx->disable_fb_clear) + return; + +- rdmsrl(MSR_IA32_MCU_OPT_CTRL, msr); ++ msr = __rdmsr(MSR_IA32_MCU_OPT_CTRL); + msr |= FB_CLEAR_DIS; +- wrmsrl(MSR_IA32_MCU_OPT_CTRL, msr); ++ native_wrmsrl(MSR_IA32_MCU_OPT_CTRL, msr); + /* Cache the MSR value to avoid reading it later */ + vmx->msr_ia32_mcu_opt_ctrl = msr; + } +@@ -396,7 +396,7 @@ static __always_inline void vmx_enable_fb_clear(struct vcpu_vmx *vmx) + return; + + vmx->msr_ia32_mcu_opt_ctrl &= ~FB_CLEAR_DIS; +- wrmsrl(MSR_IA32_MCU_OPT_CTRL, vmx->msr_ia32_mcu_opt_ctrl); ++ native_wrmsrl(MSR_IA32_MCU_OPT_CTRL, vmx->msr_ia32_mcu_opt_ctrl); + } + + static void vmx_update_fb_clear_dis(struct kvm_vcpu *vcpu, struct vcpu_vmx *vmx) +diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c +index 1910e1e78b15..26d0cac32f73 100644 +--- a/arch/x86/kvm/x86.c ++++ b/arch/x86/kvm/x86.c +@@ -12631,9 +12631,9 @@ void kvm_arch_end_assignment(struct kvm *kvm) + } + EXPORT_SYMBOL_GPL(kvm_arch_end_assignment); + +-bool kvm_arch_has_assigned_device(struct kvm *kvm) ++bool noinstr kvm_arch_has_assigned_device(struct kvm *kvm) + { +- return atomic_read(&kvm->arch.assigned_device_count); ++ return arch_atomic_read(&kvm->arch.assigned_device_count); + } + EXPORT_SYMBOL_GPL(kvm_arch_has_assigned_device); + +diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h +index c20f2d55840c..83cf7fd842e0 100644 +--- a/include/linux/kvm_host.h ++++ b/include/linux/kvm_host.h +@@ -1513,7 +1513,7 @@ static inline void kvm_arch_end_assignment(struct kvm *kvm) + { + } + +-static inline bool kvm_arch_has_assigned_device(struct kvm *kvm) ++static __always_inline bool kvm_arch_has_assigned_device(struct kvm *kvm) + { + return false; + } diff --git a/debian/patches/bugfix/x86/retbleed/0002-x86-cpufeatures-Move-RETPOLINE-flags-to-word-11.patch b/debian/patches/bugfix/x86/retbleed/0002-x86-cpufeatures-Move-RETPOLINE-flags-to-word-11.patch new file mode 100644 index 000000000..6a60eea8f --- /dev/null +++ b/debian/patches/bugfix/x86/retbleed/0002-x86-cpufeatures-Move-RETPOLINE-flags-to-word-11.patch @@ -0,0 +1,43 @@ +From: Peter Zijlstra <peterz@infradead.org> +Date: Tue, 14 Jun 2022 23:15:33 +0200 +Subject: x86/cpufeatures: Move RETPOLINE flags to word 11 +Origin: https://git.kernel.org/linus/a883d624aed463c84c22596006e5a96f5b44db31 + +In order to extend the RETPOLINE features to 4, move them to word 11 +where there is still room. This mostly keeps DISABLE_RETPOLINE +simple. + +Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> +Signed-off-by: Borislav Petkov <bp@suse.de> +Reviewed-by: Josh Poimboeuf <jpoimboe@kernel.org> +Signed-off-by: Borislav Petkov <bp@suse.de> +--- + arch/x86/include/asm/cpufeatures.h | 8 ++++++-- + 1 file changed, 6 insertions(+), 2 deletions(-) + +diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h +index 03acc823838a..4807302a32d9 100644 +--- a/arch/x86/include/asm/cpufeatures.h ++++ b/arch/x86/include/asm/cpufeatures.h +@@ -203,8 +203,8 @@ + #define X86_FEATURE_PROC_FEEDBACK ( 7*32+ 9) /* AMD ProcFeedbackInterface */ + #define X86_FEATURE_XCOMPACTED ( 7*32+10) /* "" Use compacted XSTATE (XSAVES or XSAVEC) */ + #define X86_FEATURE_PTI ( 7*32+11) /* Kernel Page Table Isolation enabled */ +-#define X86_FEATURE_RETPOLINE ( 7*32+12) /* "" Generic Retpoline mitigation for Spectre variant 2 */ +-#define X86_FEATURE_RETPOLINE_LFENCE ( 7*32+13) /* "" Use LFENCE for Spectre variant 2 */ ++/* FREE! ( 7*32+12) */ ++/* FREE! ( 7*32+13) */ + #define X86_FEATURE_INTEL_PPIN ( 7*32+14) /* Intel Processor Inventory Number */ + #define X86_FEATURE_CDP_L2 ( 7*32+15) /* Code and Data Prioritization L2 */ + #define X86_FEATURE_MSR_SPEC_CTRL ( 7*32+16) /* "" MSR SPEC_CTRL is implemented */ +@@ -296,6 +296,10 @@ + #define X86_FEATURE_PER_THREAD_MBA (11*32+ 7) /* "" Per-thread Memory Bandwidth Allocation */ + #define X86_FEATURE_SGX1 (11*32+ 8) /* "" Basic SGX */ + #define X86_FEATURE_SGX2 (11*32+ 9) /* "" SGX Enclave Dynamic Memory Management (EDMM) */ ++/* FREE! (11*32+10) */ ++/* FREE! (11*32+11) */ ++#define X86_FEATURE_RETPOLINE (11*32+12) /* "" Generic Retpoline mitigation for Spectre variant 2 */ ++#define X86_FEATURE_RETPOLINE_LFENCE (11*32+13) /* "" Use LFENCE for Spectre variant 2 */ + + /* Intel-defined CPU features, CPUID level 0x00000007:1 (EAX), word 12 */ + #define X86_FEATURE_AVX_VNNI (12*32+ 4) /* AVX VNNI instructions */ diff --git a/debian/patches/bugfix/x86/retbleed/0003-x86-retpoline-Cleanup-some-ifdefery.patch b/debian/patches/bugfix/x86/retbleed/0003-x86-retpoline-Cleanup-some-ifdefery.patch new file mode 100644 index 000000000..4903227d0 --- /dev/null +++ b/debian/patches/bugfix/x86/retbleed/0003-x86-retpoline-Cleanup-some-ifdefery.patch @@ -0,0 +1,95 @@ +From: Peter Zijlstra <peterz@infradead.org> +Date: Tue, 14 Jun 2022 23:15:34 +0200 +Subject: x86/retpoline: Cleanup some #ifdefery +Origin: https://git.kernel.org/linus/369ae6ffc41a3c1137cab697635a84d0cc7cdcea + +On it's own not much of a cleanup but it prepares for more/similar +code. + +Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> +Signed-off-by: Borislav Petkov <bp@suse.de> +Reviewed-by: Borislav Petkov <bp@suse.de> +Reviewed-by: Josh Poimboeuf <jpoimboe@kernel.org> +Signed-off-by: Borislav Petkov <bp@suse.de> +--- + arch/x86/include/asm/disabled-features.h | 9 ++++++++- + arch/x86/include/asm/nospec-branch.h | 7 +++---- + arch/x86/net/bpf_jit_comp.c | 7 +++---- + 3 files changed, 14 insertions(+), 9 deletions(-) + +diff --git a/arch/x86/include/asm/disabled-features.h b/arch/x86/include/asm/disabled-features.h +index 36369e76cc63..4061f4907c92 100644 +--- a/arch/x86/include/asm/disabled-features.h ++++ b/arch/x86/include/asm/disabled-features.h +@@ -50,6 +50,13 @@ + # define DISABLE_PTI (1 << (X86_FEATURE_PTI & 31)) + #endif + ++#ifdef CONFIG_RETPOLINE ++# define DISABLE_RETPOLINE 0 ++#else ++# define DISABLE_RETPOLINE ((1 << (X86_FEATURE_RETPOLINE & 31)) | \ ++ (1 << (X86_FEATURE_RETPOLINE_LFENCE & 31))) ++#endif ++ + #ifdef CONFIG_INTEL_IOMMU_SVM + # define DISABLE_ENQCMD 0 + #else +@@ -82,7 +89,7 @@ + #define DISABLED_MASK8 (DISABLE_TDX_GUEST) + #define DISABLED_MASK9 (DISABLE_SGX) + #define DISABLED_MASK10 0 +-#define DISABLED_MASK11 0 ++#define DISABLED_MASK11 (DISABLE_RETPOLINE) + #define DISABLED_MASK12 0 + #define DISABLED_MASK13 0 + #define DISABLED_MASK14 0 +diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h +index da251a5645b0..5728539a3e77 100644 +--- a/arch/x86/include/asm/nospec-branch.h ++++ b/arch/x86/include/asm/nospec-branch.h +@@ -120,17 +120,16 @@ + _ASM_PTR " 999b\n\t" \ + ".popsection\n\t" + +-#ifdef CONFIG_RETPOLINE +- + typedef u8 retpoline_thunk_t[RETPOLINE_THUNK_SIZE]; ++extern retpoline_thunk_t __x86_indirect_thunk_array[]; ++ ++#ifdef CONFIG_RETPOLINE + + #define GEN(reg) \ + extern retpoline_thunk_t __x86_indirect_thunk_ ## reg; + #include <asm/GEN-for-each-reg.h> + #undef GEN + +-extern retpoline_thunk_t __x86_indirect_thunk_array[]; +- + #ifdef CONFIG_X86_64 + + /* +diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c +index c98b8c0ed3b8..79d26908c143 100644 +--- a/arch/x86/net/bpf_jit_comp.c ++++ b/arch/x86/net/bpf_jit_comp.c +@@ -412,16 +412,15 @@ static void emit_indirect_jump(u8 **pprog, int reg, u8 *ip) + { + u8 *prog = *pprog; + +-#ifdef CONFIG_RETPOLINE + if (cpu_feature_enabled(X86_FEATURE_RETPOLINE_LFENCE)) { + EMIT_LFENCE(); + EMIT2(0xFF, 0xE0 + reg); + } else if (cpu_feature_enabled(X86_FEATURE_RETPOLINE)) { + OPTIMIZER_HIDE_VAR(reg); + emit_jump(&prog, &__x86_indirect_thunk_array[reg], ip); +- } else +-#endif +- EMIT2(0xFF, 0xE0 + reg); ++ } else { ++ EMIT2(0xFF, 0xE0 + reg); ++ } + + *pprog = prog; + } diff --git a/debian/patches/bugfix/x86/retbleed/0004-x86-retpoline-Swizzle-retpoline-thunk.patch b/debian/patches/bugfix/x86/retbleed/0004-x86-retpoline-Swizzle-retpoline-thunk.patch new file mode 100644 index 000000000..4fec65b73 --- /dev/null +++ b/debian/patches/bugfix/x86/retbleed/0004-x86-retpoline-Swizzle-retpoline-thunk.patch @@ -0,0 +1,36 @@ +From: Peter Zijlstra <peterz@infradead.org> +Date: Tue, 14 Jun 2022 23:15:35 +0200 +Subject: x86/retpoline: Swizzle retpoline thunk +Origin: https://git.kernel.org/linus/00e1533325fd1fb5459229fe37f235462649f668 + +Put the actual retpoline thunk as the original code so that it can +become more complicated. Specifically, it allows RET to be a JMP, +which can't be .altinstr_replacement since that doesn't do relocations +(except for the very first instruction). + +Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> +Signed-off-by: Borislav Petkov <bp@suse.de> +Reviewed-by: Borislav Petkov <bp@suse.de> +Reviewed-by: Josh Poimboeuf <jpoimboe@kernel.org> +Signed-off-by: Borislav Petkov <bp@suse.de> +--- + arch/x86/lib/retpoline.S | 6 +++--- + 1 file changed, 3 insertions(+), 3 deletions(-) + +diff --git a/arch/x86/lib/retpoline.S b/arch/x86/lib/retpoline.S +index b2b2366885a2..2cdd62499d54 100644 +--- a/arch/x86/lib/retpoline.S ++++ b/arch/x86/lib/retpoline.S +@@ -33,9 +33,9 @@ SYM_INNER_LABEL(__x86_indirect_thunk_\reg, SYM_L_GLOBAL) + UNWIND_HINT_EMPTY + ANNOTATE_NOENDBR + +- ALTERNATIVE_2 __stringify(ANNOTATE_RETPOLINE_SAFE; jmp *%\reg), \ +- __stringify(RETPOLINE \reg), X86_FEATURE_RETPOLINE, \ +- __stringify(lfence; ANNOTATE_RETPOLINE_SAFE; jmp *%\reg; int3), X86_FEATURE_RETPOLINE_LFENCE ++ ALTERNATIVE_2 __stringify(RETPOLINE \reg), \ ++ __stringify(lfence; ANNOTATE_RETPOLINE_SAFE; jmp *%\reg; int3), X86_FEATURE_RETPOLINE_LFENCE, \ ++ __stringify(ANNOTATE_RETPOLINE_SAFE; jmp *%\reg), ALT_NOT(X86_FEATURE_RETPOLINE) + + .endm + diff --git a/debian/patches/bugfix/x86/retbleed/0005-x86-retpoline-Use-mfunction-return.patch b/debian/patches/bugfix/x86/retbleed/0005-x86-retpoline-Use-mfunction-return.patch new file mode 100644 index 000000000..340617608 --- /dev/null +++ b/debian/patches/bugfix/x86/retbleed/0005-x86-retpoline-Use-mfunction-return.patch @@ -0,0 +1,77 @@ +From: Peter Zijlstra <peterz@infradead.org> +Date: Tue, 14 Jun 2022 23:15:36 +0200 +Subject: x86/retpoline: Use -mfunction-return +Origin: https://git.kernel.org/linus/0b53c374b9eff2255a386f1f1cfb9a928e52a5ae + +Utilize -mfunction-return=thunk-extern when available to have the +compiler replace RET instructions with direct JMPs to the symbol +__x86_return_thunk. This does not affect assembler (.S) sources, only C +sources. + +-mfunction-return=thunk-extern has been available since gcc 7.3 and +clang 15. + +Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> +Signed-off-by: Borislav Petkov <bp@suse.de> +Reviewed-by: Nick Desaulniers <ndesaulniers@google.com> +Reviewed-by: Josh Poimboeuf <jpoimboe@kernel.org> +Tested-by: Nick Desaulniers <ndesaulniers@google.com> +Signed-off-by: Borislav Petkov <bp@suse.de> +--- + arch/x86/Makefile | 2 ++ + arch/x86/include/asm/nospec-branch.h | 2 ++ + arch/x86/lib/retpoline.S | 13 +++++++++++++ + 3 files changed, 17 insertions(+) + +diff --git a/arch/x86/Makefile b/arch/x86/Makefile +index a74886aed349..6e16057737e5 100644 +--- a/arch/x86/Makefile ++++ b/arch/x86/Makefile +@@ -15,11 +15,13 @@ endif + ifdef CONFIG_CC_IS_GCC + RETPOLINE_CFLAGS := $(call cc-option,-mindirect-branch=thunk-extern -mindirect-branch-register) + RETPOLINE_CFLAGS += $(call cc-option,-mindirect-branch-cs-prefix) ++RETPOLINE_CFLAGS += $(call cc-option,-mfunction-return=thunk-extern) + RETPOLINE_VDSO_CFLAGS := $(call cc-option,-mindirect-branch=thunk-inline -mindirect-branch-register) + endif + ifdef CONFIG_CC_IS_CLANG + RETPOLINE_CFLAGS := -mretpoline-external-thunk + RETPOLINE_VDSO_CFLAGS := -mretpoline ++RETPOLINE_CFLAGS += $(call cc-option,-mfunction-return=thunk-extern) + endif + export RETPOLINE_CFLAGS + export RETPOLINE_VDSO_CFLAGS +diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h +index 5728539a3e77..829c9f827a96 100644 +--- a/arch/x86/include/asm/nospec-branch.h ++++ b/arch/x86/include/asm/nospec-branch.h +@@ -123,6 +123,8 @@ + typedef u8 retpoline_thunk_t[RETPOLINE_THUNK_SIZE]; + extern retpoline_thunk_t __x86_indirect_thunk_array[]; + ++extern void __x86_return_thunk(void); ++ + #ifdef CONFIG_RETPOLINE + + #define GEN(reg) \ +diff --git a/arch/x86/lib/retpoline.S b/arch/x86/lib/retpoline.S +index 2cdd62499d54..4467c21215f4 100644 +--- a/arch/x86/lib/retpoline.S ++++ b/arch/x86/lib/retpoline.S +@@ -67,3 +67,16 @@ SYM_CODE_END(__x86_indirect_thunk_array) + #define GEN(reg) EXPORT_THUNK(reg) + #include <asm/GEN-for-each-reg.h> + #undef GEN ++ ++/* ++ * This function name is magical and is used by -mfunction-return=thunk-extern ++ * for the compiler to generate JMPs to it. ++ */ ++SYM_CODE_START(__x86_return_thunk) ++ UNWIND_HINT_EMPTY ++ ANNOTATE_NOENDBR ++ ret ++ int3 ++SYM_CODE_END(__x86_return_thunk) ++ ++__EXPORT_THUNK(__x86_return_thunk) diff --git a/debian/patches/bugfix/x86/retbleed/0006-x86-Undo-return-thunk-damage.patch b/debian/patches/bugfix/x86/retbleed/0006-x86-Undo-return-thunk-damage.patch new file mode 100644 index 000000000..3b0dbd030 --- /dev/null +++ b/debian/patches/bugfix/x86/retbleed/0006-x86-Undo-return-thunk-damage.patch @@ -0,0 +1,199 @@ +From: Peter Zijlstra <peterz@infradead.org> +Date: Tue, 14 Jun 2022 23:15:37 +0200 +Subject: x86: Undo return-thunk damage +Origin: https://git.kernel.org/linus/15e67227c49a57837108acfe1c80570e1bd9f962 + +Introduce X86_FEATURE_RETHUNK for those afflicted with needing this. + + [ bp: Do only INT3 padding - simpler. ] + +Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> +Signed-off-by: Borislav Petkov <bp@suse.de> +Reviewed-by: Josh Poimboeuf <jpoimboe@kernel.org> +Signed-off-by: Borislav Petkov <bp@suse.de> +--- + arch/x86/include/asm/alternative.h | 1 + + arch/x86/include/asm/cpufeatures.h | 1 + + arch/x86/include/asm/disabled-features.h | 3 +- + arch/x86/kernel/alternative.c | 60 ++++++++++++++++++++++++ + arch/x86/kernel/module.c | 8 +++- + arch/x86/kernel/vmlinux.lds.S | 7 +++ + 6 files changed, 78 insertions(+), 2 deletions(-) + +diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h +index 9b10c8c76087..9542c582d546 100644 +--- a/arch/x86/include/asm/alternative.h ++++ b/arch/x86/include/asm/alternative.h +@@ -76,6 +76,7 @@ extern int alternatives_patched; + extern void alternative_instructions(void); + extern void apply_alternatives(struct alt_instr *start, struct alt_instr *end); + extern void apply_retpolines(s32 *start, s32 *end); ++extern void apply_returns(s32 *start, s32 *end); + extern void apply_ibt_endbr(s32 *start, s32 *end); + + struct module; +diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h +index 4807302a32d9..295e69090fb8 100644 +--- a/arch/x86/include/asm/cpufeatures.h ++++ b/arch/x86/include/asm/cpufeatures.h +@@ -300,6 +300,7 @@ + /* FREE! (11*32+11) */ + #define X86_FEATURE_RETPOLINE (11*32+12) /* "" Generic Retpoline mitigation for Spectre variant 2 */ + #define X86_FEATURE_RETPOLINE_LFENCE (11*32+13) /* "" Use LFENCE for Spectre variant 2 */ ++#define X86_FEATURE_RETHUNK (11*32+14) /* "" Use REturn THUNK */ + + /* Intel-defined CPU features, CPUID level 0x00000007:1 (EAX), word 12 */ + #define X86_FEATURE_AVX_VNNI (12*32+ 4) /* AVX VNNI instructions */ +diff --git a/arch/x86/include/asm/disabled-features.h b/arch/x86/include/asm/disabled-features.h +index 4061f4907c92..641c479cca17 100644 +--- a/arch/x86/include/asm/disabled-features.h ++++ b/arch/x86/include/asm/disabled-features.h +@@ -54,7 +54,8 @@ + # define DISABLE_RETPOLINE 0 + #else + # define DISABLE_RETPOLINE ((1 << (X86_FEATURE_RETPOLINE & 31)) | \ +- (1 << (X86_FEATURE_RETPOLINE_LFENCE & 31))) ++ (1 << (X86_FEATURE_RETPOLINE_LFENCE & 31)) | \ ++ (1 << (X86_FEATURE_RETHUNK & 31))) + #endif + + #ifdef CONFIG_INTEL_IOMMU_SVM +diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c +index e257f6c80372..76b745921509 100644 +--- a/arch/x86/kernel/alternative.c ++++ b/arch/x86/kernel/alternative.c +@@ -115,6 +115,7 @@ static void __init_or_module add_nops(void *insns, unsigned int len) + } + + extern s32 __retpoline_sites[], __retpoline_sites_end[]; ++extern s32 __return_sites[], __return_sites_end[]; + extern s32 __ibt_endbr_seal[], __ibt_endbr_seal_end[]; + extern struct alt_instr __alt_instructions[], __alt_instructions_end[]; + extern s32 __smp_locks[], __smp_locks_end[]; +@@ -507,9 +508,67 @@ void __init_or_module noinline apply_retpolines(s32 *start, s32 *end) + } + } + ++/* ++ * Rewrite the compiler generated return thunk tail-calls. ++ * ++ * For example, convert: ++ * ++ * JMP __x86_return_thunk ++ * ++ * into: ++ * ++ * RET ++ */ ++static int patch_return(void *addr, struct insn *insn, u8 *bytes) ++{ ++ int i = 0; ++ ++ if (cpu_feature_enabled(X86_FEATURE_RETHUNK)) ++ return -1; ++ ++ bytes[i++] = RET_INSN_OPCODE; ++ ++ for (; i < insn->length;) ++ bytes[i++] = INT3_INSN_OPCODE; ++ ++ return i; ++} ++ ++void __init_or_module noinline apply_returns(s32 *start, s32 *end) ++{ ++ s32 *s; ++ ++ for (s = start; s < end; s++) { ++ void *addr = (void *)s + *s; ++ struct insn insn; ++ int len, ret; ++ u8 bytes[16]; ++ u8 op1; ++ ++ ret = insn_decode_kernel(&insn, addr); ++ if (WARN_ON_ONCE(ret < 0)) ++ continue; ++ ++ op1 = insn.opcode.bytes[0]; ++ if (WARN_ON_ONCE(op1 != JMP32_INSN_OPCODE)) ++ continue; ++ ++ DPRINTK("return thunk at: %pS (%px) len: %d to: %pS", ++ addr, addr, insn.length, ++ addr + insn.length + insn.immediate.value); ++ ++ len = patch_return(addr, &insn, bytes); ++ if (len == insn.length) { ++ DUMP_BYTES(((u8*)addr), len, "%px: orig: ", addr); ++ DUMP_BYTES(((u8*)bytes), len, "%px: repl: ", addr); ++ text_poke_early(addr, bytes, len); ++ } ++ } ++} + #else /* !CONFIG_RETPOLINE || !CONFIG_OBJTOOL */ + + void __init_or_module noinline apply_retpolines(s32 *start, s32 *end) { } ++void __init_or_module noinline apply_returns(s32 *start, s32 *end) { } + + #endif /* CONFIG_RETPOLINE && CONFIG_OBJTOOL */ + +@@ -860,6 +919,7 @@ void __init alternative_instructions(void) + * those can rewrite the retpoline thunks. + */ + apply_retpolines(__retpoline_sites, __retpoline_sites_end); ++ apply_returns(__return_sites, __return_sites_end); + + /* + * Then patch alternatives, such that those paravirt calls that are in +diff --git a/arch/x86/kernel/module.c b/arch/x86/kernel/module.c +index b98ffcf4d250..67828d973389 100644 +--- a/arch/x86/kernel/module.c ++++ b/arch/x86/kernel/module.c +@@ -253,7 +253,7 @@ int module_finalize(const Elf_Ehdr *hdr, + { + const Elf_Shdr *s, *text = NULL, *alt = NULL, *locks = NULL, + *para = NULL, *orc = NULL, *orc_ip = NULL, +- *retpolines = NULL, *ibt_endbr = NULL; ++ *retpolines = NULL, *returns = NULL, *ibt_endbr = NULL; + char *secstrings = (void *)hdr + sechdrs[hdr->e_shstrndx].sh_offset; + + for (s = sechdrs; s < sechdrs + hdr->e_shnum; s++) { +@@ -271,6 +271,8 @@ int module_finalize(const Elf_Ehdr *hdr, + orc_ip = s; + if (!strcmp(".retpoline_sites", secstrings + s->sh_name)) + retpolines = s; ++ if (!strcmp(".return_sites", secstrings + s->sh_name)) ++ returns = s; + if (!strcmp(".ibt_endbr_seal", secstrings + s->sh_name)) + ibt_endbr = s; + } +@@ -287,6 +289,10 @@ int module_finalize(const Elf_Ehdr *hdr, + void *rseg = (void *)retpolines->sh_addr; + apply_retpolines(rseg, rseg + retpolines->sh_size); + } ++ if (returns) { ++ void *rseg = (void *)returns->sh_addr; ++ apply_returns(rseg, rseg + returns->sh_size); ++ } + if (alt) { + /* patch .altinstructions */ + void *aseg = (void *)alt->sh_addr; +diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S +index 81aba718ecd5..ada7eb738113 100644 +--- a/arch/x86/kernel/vmlinux.lds.S ++++ b/arch/x86/kernel/vmlinux.lds.S +@@ -283,6 +283,13 @@ SECTIONS + *(.retpoline_sites) + __retpoline_sites_end = .; + } ++ ++ . = ALIGN(8); ++ .return_sites : AT(ADDR(.return_sites) - LOAD_OFFSET) { ++ __return_sites = .; ++ *(.return_sites) ++ __return_sites_end = .; ++ } + #endif + + #ifdef CONFIG_X86_KERNEL_IBT diff --git a/debian/patches/bugfix/x86/retbleed/0007-x86-objtool-Create-.return_sites.patch b/debian/patches/bugfix/x86/retbleed/0007-x86-objtool-Create-.return_sites.patch new file mode 100644 index 000000000..7014a0786 --- /dev/null +++ b/debian/patches/bugfix/x86/retbleed/0007-x86-objtool-Create-.return_sites.patch @@ -0,0 +1,202 @@ +From: Peter Zijlstra <peterz@infradead.org> +Date: Tue, 14 Jun 2022 23:15:38 +0200 +Subject: x86,objtool: Create .return_sites +Origin: https://git.kernel.org/linus/d9e9d2300681d68a775c28de6aa6e5290ae17796 + +Find all the return-thunk sites and record them in a .return_sites +section such that the kernel can undo this. + +Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> +Signed-off-by: Borislav Petkov <bp@suse.de> +Reviewed-by: Josh Poimboeuf <jpoimboe@kernel.org> +Signed-off-by: Borislav Petkov <bp@suse.de> +--- + tools/objtool/arch/x86/decode.c | 5 ++ + tools/objtool/check.c | 74 +++++++++++++++++++++++++ + tools/objtool/include/objtool/arch.h | 1 + + tools/objtool/include/objtool/elf.h | 1 + + tools/objtool/include/objtool/objtool.h | 1 + + tools/objtool/objtool.c | 1 + + 6 files changed, 83 insertions(+) + +diff --git a/tools/objtool/arch/x86/decode.c b/tools/objtool/arch/x86/decode.c +index 8b990a52aada..c260006106be 100644 +--- a/tools/objtool/arch/x86/decode.c ++++ b/tools/objtool/arch/x86/decode.c +@@ -787,3 +787,8 @@ bool arch_is_retpoline(struct symbol *sym) + { + return !strncmp(sym->name, "__x86_indirect_", 15); + } ++ ++bool arch_is_rethunk(struct symbol *sym) ++{ ++ return !strcmp(sym->name, "__x86_return_thunk"); ++} +diff --git a/tools/objtool/check.c b/tools/objtool/check.c +index 864bb9dd3584..f6d4ffa82432 100644 +--- a/tools/objtool/check.c ++++ b/tools/objtool/check.c +@@ -749,6 +749,52 @@ static int create_retpoline_sites_sections(struct objtool_file *file) + return 0; + } + ++static int create_return_sites_sections(struct objtool_file *file) ++{ ++ struct instruction *insn; ++ struct section *sec; ++ int idx; ++ ++ sec = find_section_by_name(file->elf, ".return_sites"); ++ if (sec) { ++ WARN("file already has .return_sites, skipping"); ++ return 0; ++ } ++ ++ idx = 0; ++ list_for_each_entry(insn, &file->return_thunk_list, call_node) ++ idx++; ++ ++ if (!idx) ++ return 0; ++ ++ sec = elf_create_section(file->elf, ".return_sites", 0, ++ sizeof(int), idx); ++ if (!sec) { ++ WARN("elf_create_section: .return_sites"); ++ return -1; ++ } ++ ++ idx = 0; ++ list_for_each_entry(insn, &file->return_thunk_list, call_node) { ++ ++ int *site = (int *)sec->data->d_buf + idx; ++ *site = 0; ++ ++ if (elf_add_reloc_to_insn(file->elf, sec, ++ idx * sizeof(int), ++ R_X86_64_PC32, ++ insn->sec, insn->offset)) { ++ WARN("elf_add_reloc_to_insn: .return_sites"); ++ return -1; ++ } ++ ++ idx++; ++ } ++ ++ return 0; ++} ++ + static int create_ibt_endbr_seal_sections(struct objtool_file *file) + { + struct instruction *insn; +@@ -1083,6 +1129,11 @@ __weak bool arch_is_retpoline(struct symbol *sym) + return false; + } + ++__weak bool arch_is_rethunk(struct symbol *sym) ++{ ++ return false; ++} ++ + #define NEGATIVE_RELOC ((void *)-1L) + + static struct reloc *insn_reloc(struct objtool_file *file, struct instruction *insn) +@@ -1250,6 +1301,18 @@ static void add_retpoline_call(struct objtool_file *file, struct instruction *in + annotate_call_site(file, insn, false); + } + ++static void add_return_call(struct objtool_file *file, struct instruction *insn) ++{ ++ /* ++ * Return thunk tail calls are really just returns in disguise, ++ * so convert them accordingly. ++ */ ++ insn->type = INSN_RETURN; ++ insn->retpoline_safe = true; ++ ++ list_add_tail(&insn->call_node, &file->return_thunk_list); ++} ++ + static bool same_function(struct instruction *insn1, struct instruction *insn2) + { + return insn1->func->pfunc == insn2->func->pfunc; +@@ -1302,6 +1365,9 @@ static int add_jump_destinations(struct objtool_file *file) + } else if (reloc->sym->retpoline_thunk) { + add_retpoline_call(file, insn); + continue; ++ } else if (reloc->sym->return_thunk) { ++ add_return_call(file, insn); ++ continue; + } else if (insn->func) { + /* + * External sibling call or internal sibling call with +@@ -2184,6 +2250,9 @@ static int classify_symbols(struct objtool_file *file) + if (arch_is_retpoline(func)) + func->retpoline_thunk = true; + ++ if (arch_is_rethunk(func)) ++ func->return_thunk = true; ++ + if (!strcmp(func->name, "__fentry__")) + func->fentry = true; + +@@ -3972,6 +4041,11 @@ int check(struct objtool_file *file) + if (ret < 0) + goto out; + warnings += ret; ++ ++ ret = create_return_sites_sections(file); ++ if (ret < 0) ++ goto out; ++ warnings += ret; + } + + if (opts.mcount) { +diff --git a/tools/objtool/include/objtool/arch.h b/tools/objtool/include/objtool/arch.h +index 9b19cc304195..beb2f3aa94ff 100644 +--- a/tools/objtool/include/objtool/arch.h ++++ b/tools/objtool/include/objtool/arch.h +@@ -89,6 +89,7 @@ const char *arch_ret_insn(int len); + int arch_decode_hint_reg(u8 sp_reg, int *base); + + bool arch_is_retpoline(struct symbol *sym); ++bool arch_is_rethunk(struct symbol *sym); + + int arch_rewrite_retpolines(struct objtool_file *file); + +diff --git a/tools/objtool/include/objtool/elf.h b/tools/objtool/include/objtool/elf.h +index adebfbc2b518..16f4067b82ae 100644 +--- a/tools/objtool/include/objtool/elf.h ++++ b/tools/objtool/include/objtool/elf.h +@@ -57,6 +57,7 @@ struct symbol { + u8 uaccess_safe : 1; + u8 static_call_tramp : 1; + u8 retpoline_thunk : 1; ++ u8 return_thunk : 1; + u8 fentry : 1; + u8 profiling_func : 1; + struct list_head pv_target; +diff --git a/tools/objtool/include/objtool/objtool.h b/tools/objtool/include/objtool/objtool.h +index a6e72d916807..7f2d1b095333 100644 +--- a/tools/objtool/include/objtool/objtool.h ++++ b/tools/objtool/include/objtool/objtool.h +@@ -24,6 +24,7 @@ struct objtool_file { + struct list_head insn_list; + DECLARE_HASHTABLE(insn_hash, 20); + struct list_head retpoline_call_list; ++ struct list_head return_thunk_list; + struct list_head static_call_list; + struct list_head mcount_loc_list; + struct list_head endbr_list; +diff --git a/tools/objtool/objtool.c b/tools/objtool/objtool.c +index 512669ce064c..a7ecc32e3512 100644 +--- a/tools/objtool/objtool.c ++++ b/tools/objtool/objtool.c +@@ -102,6 +102,7 @@ struct objtool_file *objtool_open_read(const char *_objname) + INIT_LIST_HEAD(&file.insn_list); + hash_init(file.insn_hash); + INIT_LIST_HEAD(&file.retpoline_call_list); ++ INIT_LIST_HEAD(&file.return_thunk_list); + INIT_LIST_HEAD(&file.static_call_list); + INIT_LIST_HEAD(&file.mcount_loc_list); + INIT_LIST_HEAD(&file.endbr_list); diff --git a/debian/patches/bugfix/x86/retbleed/0008-x86-static_call-Use-alternative-RET-encoding.patch b/debian/patches/bugfix/x86/retbleed/0008-x86-static_call-Use-alternative-RET-encoding.patch new file mode 100644 index 000000000..c782206fd --- /dev/null +++ b/debian/patches/bugfix/x86/retbleed/0008-x86-static_call-Use-alternative-RET-encoding.patch @@ -0,0 +1,199 @@ +From: Peter Zijlstra <peterz@infradead.org> +Date: Tue, 14 Jun 2022 23:15:39 +0200 +Subject: x86,static_call: Use alternative RET encoding +Origin: https://git.kernel.org/linus/ee88d363d15617ff50ac24fab0ffec11113b2aeb + +In addition to teaching static_call about the new way to spell 'RET', +there is an added complication in that static_call() is allowed to +rewrite text before it is known which particular spelling is required. + +In order to deal with this; have a static_call specific fixup in the +apply_return() 'alternative' patching routine that will rewrite the +static_call trampoline to match the definite sequence. + +This in turn creates the problem of uniquely identifying static call +trampolines. Currently trampolines are 8 bytes, the first 5 being the +jmp.d32/ret sequence and the final 3 a byte sequence that spells out +'SCT'. + +This sequence is used in __static_call_validate() to ensure it is +patching a trampoline and not a random other jmp.d32. That is, +false-positives shouldn't be plenty, but aren't a big concern. + +OTOH the new __static_call_fixup() must not have false-positives, and +'SCT' decodes to the somewhat weird but semi plausible sequence: + + push %rbx + rex.XB push %r12 + +Additionally, there are SLS concerns with immediate jumps. Combined it +seems like a good moment to change the signature to a single 3 byte +trap instruction that is unique to this usage and will not ever get +generated by accident. + +As such, change the signature to: '0x0f, 0xb9, 0xcc', which decodes +to: + + ud1 %esp, %ecx + +Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> +Signed-off-by: Borislav Petkov <bp@suse.de> +Reviewed-by: Josh Poimboeuf <jpoimboe@kernel.org> +Signed-off-by: Borislav Petkov <bp@suse.de> +--- + arch/x86/include/asm/static_call.h | 19 +++++++++++++- + arch/x86/kernel/alternative.c | 12 ++++++--- + arch/x86/kernel/static_call.c | 40 ++++++++++++++++++++++++++++-- + 3 files changed, 64 insertions(+), 7 deletions(-) + +diff --git a/arch/x86/include/asm/static_call.h b/arch/x86/include/asm/static_call.h +index 2d8dacd02643..70cc9ccb8029 100644 +--- a/arch/x86/include/asm/static_call.h ++++ b/arch/x86/include/asm/static_call.h +@@ -21,6 +21,16 @@ + * relative displacement across sections. + */ + ++/* ++ * The trampoline is 8 bytes and of the general form: ++ * ++ * jmp.d32 \func ++ * ud1 %esp, %ecx ++ * ++ * That trailing #UD provides both a speculation stop and serves as a unique ++ * 3 byte signature identifying static call trampolines. Also see tramp_ud[] ++ * and __static_call_fixup(). ++ */ + #define __ARCH_DEFINE_STATIC_CALL_TRAMP(name, insns) \ + asm(".pushsection .static_call.text, \"ax\" \n" \ + ".align 4 \n" \ +@@ -28,7 +38,7 @@ + STATIC_CALL_TRAMP_STR(name) ": \n" \ + ANNOTATE_NOENDBR \ + insns " \n" \ +- ".byte 0x53, 0x43, 0x54 \n" \ ++ ".byte 0x0f, 0xb9, 0xcc \n" \ + ".type " STATIC_CALL_TRAMP_STR(name) ", @function \n" \ + ".size " STATIC_CALL_TRAMP_STR(name) ", . - " STATIC_CALL_TRAMP_STR(name) " \n" \ + ".popsection \n") +@@ -36,8 +46,13 @@ + #define ARCH_DEFINE_STATIC_CALL_TRAMP(name, func) \ + __ARCH_DEFINE_STATIC_CALL_TRAMP(name, ".byte 0xe9; .long " #func " - (. + 4)") + ++#ifdef CONFIG_RETPOLINE ++#define ARCH_DEFINE_STATIC_CALL_NULL_TRAMP(name) \ ++ __ARCH_DEFINE_STATIC_CALL_TRAMP(name, "jmp __x86_return_thunk") ++#else + #define ARCH_DEFINE_STATIC_CALL_NULL_TRAMP(name) \ + __ARCH_DEFINE_STATIC_CALL_TRAMP(name, "ret; int3; nop; nop; nop") ++#endif + + #define ARCH_DEFINE_STATIC_CALL_RET0_TRAMP(name) \ + ARCH_DEFINE_STATIC_CALL_TRAMP(name, __static_call_return0) +@@ -48,4 +63,6 @@ + ".long " STATIC_CALL_KEY_STR(name) " - . \n" \ + ".popsection \n") + ++extern bool __static_call_fixup(void *tramp, u8 op, void *dest); ++ + #endif /* _ASM_STATIC_CALL_H */ +diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c +index 76b745921509..cf447ee18b3c 100644 +--- a/arch/x86/kernel/alternative.c ++++ b/arch/x86/kernel/alternative.c +@@ -539,18 +539,22 @@ void __init_or_module noinline apply_returns(s32 *start, s32 *end) + s32 *s; + + for (s = start; s < end; s++) { +- void *addr = (void *)s + *s; ++ void *dest = NULL, *addr = (void *)s + *s; + struct insn insn; + int len, ret; + u8 bytes[16]; +- u8 op1; ++ u8 op; + + ret = insn_decode_kernel(&insn, addr); + if (WARN_ON_ONCE(ret < 0)) + continue; + +- op1 = insn.opcode.bytes[0]; +- if (WARN_ON_ONCE(op1 != JMP32_INSN_OPCODE)) ++ op = insn.opcode.bytes[0]; ++ if (op == JMP32_INSN_OPCODE) ++ dest = addr + insn.length + insn.immediate.value; ++ ++ if (__static_call_fixup(addr, op, dest) || ++ WARN_ON_ONCE(dest != &__x86_return_thunk)) + continue; + + DPRINTK("return thunk at: %pS (%px) len: %d to: %pS", +diff --git a/arch/x86/kernel/static_call.c b/arch/x86/kernel/static_call.c +index aa72cefdd5be..fe21fe778185 100644 +--- a/arch/x86/kernel/static_call.c ++++ b/arch/x86/kernel/static_call.c +@@ -11,6 +11,13 @@ enum insn_type { + RET = 3, /* tramp / site cond-tail-call */ + }; + ++/* ++ * ud1 %esp, %ecx - a 3 byte #UD that is unique to trampolines, chosen such ++ * that there is no false-positive trampoline identification while also being a ++ * speculation stop. ++ */ ++static const u8 tramp_ud[] = { 0x0f, 0xb9, 0xcc }; ++ + /* + * cs cs cs xorl %eax, %eax - a single 5 byte instruction that clears %[er]ax + */ +@@ -43,7 +50,10 @@ static void __ref __static_call_transform(void *insn, enum insn_type type, void + break; + + case RET: +- code = &retinsn; ++ if (cpu_feature_enabled(X86_FEATURE_RETHUNK)) ++ code = text_gen_insn(JMP32_INSN_OPCODE, insn, &__x86_return_thunk); ++ else ++ code = &retinsn; + break; + } + +@@ -60,7 +70,7 @@ static void __static_call_validate(void *insn, bool tail, bool tramp) + { + u8 opcode = *(u8 *)insn; + +- if (tramp && memcmp(insn+5, "SCT", 3)) { ++ if (tramp && memcmp(insn+5, tramp_ud, 3)) { + pr_err("trampoline signature fail"); + BUG(); + } +@@ -115,3 +125,29 @@ void arch_static_call_transform(void *site, void *tramp, void *func, bool tail) + mutex_unlock(&text_mutex); + } + EXPORT_SYMBOL_GPL(arch_static_call_transform); ++ ++#ifdef CONFIG_RETPOLINE ++/* ++ * This is called by apply_returns() to fix up static call trampolines, ++ * specifically ARCH_DEFINE_STATIC_CALL_NULL_TRAMP which is recorded as ++ * having a return trampoline. ++ * ++ * The problem is that static_call() is available before determining ++ * X86_FEATURE_RETHUNK and, by implication, running alternatives. ++ * ++ * This means that __static_call_transform() above can have overwritten the ++ * return trampoline and we now need to fix things up to be consistent. ++ */ ++bool __static_call_fixup(void *tramp, u8 op, void *dest) ++{ ++ if (memcmp(tramp+5, tramp_ud, 3)) { ++ /* Not a trampoline site, not our problem. */ ++ return false; ++ } ++ ++ if (op == RET_INSN_OPCODE || dest == &__x86_return_thunk) ++ __static_call_transform(tramp, RET, NULL); ++ ++ return true; ++} ++#endif diff --git a/debian/patches/bugfix/x86/retbleed/0009-x86-ftrace-Use-alternative-RET-encoding.patch b/debian/patches/bugfix/x86/retbleed/0009-x86-ftrace-Use-alternative-RET-encoding.patch new file mode 100644 index 000000000..43654b9b4 --- /dev/null +++ b/debian/patches/bugfix/x86/retbleed/0009-x86-ftrace-Use-alternative-RET-encoding.patch @@ -0,0 +1,40 @@ +From: Peter Zijlstra <peterz@infradead.org> +Date: Tue, 14 Jun 2022 23:15:40 +0200 +Subject: x86/ftrace: Use alternative RET encoding +Origin: https://git.kernel.org/linus/1f001e9da6bbf482311e45e48f53c2bd2179e59c + +Use the return thunk in ftrace trampolines, if needed. + +Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> +Signed-off-by: Borislav Petkov <bp@suse.de> +Reviewed-by: Josh Poimboeuf <jpoimboe@kernel.org> +Signed-off-by: Borislav Petkov <bp@suse.de> +--- + arch/x86/kernel/ftrace.c | 7 +++++-- + 1 file changed, 5 insertions(+), 2 deletions(-) + +diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c +index 5b4efc927d80..24b9fa89aa27 100644 +--- a/arch/x86/kernel/ftrace.c ++++ b/arch/x86/kernel/ftrace.c +@@ -301,7 +301,7 @@ union ftrace_op_code_union { + } __attribute__((packed)); + }; + +-#define RET_SIZE 1 + IS_ENABLED(CONFIG_SLS) ++#define RET_SIZE (IS_ENABLED(CONFIG_RETPOLINE) ? 5 : 1 + IS_ENABLED(CONFIG_SLS)) + + static unsigned long + create_trampoline(struct ftrace_ops *ops, unsigned int *tramp_size) +@@ -357,7 +357,10 @@ create_trampoline(struct ftrace_ops *ops, unsigned int *tramp_size) + goto fail; + + ip = trampoline + size; +- memcpy(ip, retq, RET_SIZE); ++ if (cpu_feature_enabled(X86_FEATURE_RETHUNK)) ++ __text_gen_insn(ip, JMP32_INSN_OPCODE, ip, &__x86_return_thunk, JMP32_INSN_SIZE); ++ else ++ memcpy(ip, retq, sizeof(retq)); + + /* No need to test direct calls on created trampolines */ + if (ops->flags & FTRACE_OPS_FL_SAVE_REGS) { diff --git a/debian/patches/bugfix/x86/retbleed/0010-x86-bpf-Use-alternative-RET-encoding.patch b/debian/patches/bugfix/x86/retbleed/0010-x86-bpf-Use-alternative-RET-encoding.patch new file mode 100644 index 000000000..d7eb6c411 --- /dev/null +++ b/debian/patches/bugfix/x86/retbleed/0010-x86-bpf-Use-alternative-RET-encoding.patch @@ -0,0 +1,59 @@ +From: Peter Zijlstra <peterz@infradead.org> +Date: Tue, 14 Jun 2022 23:15:41 +0200 +Subject: x86/bpf: Use alternative RET encoding +Origin: https://git.kernel.org/linus/d77cfe594ad50e0bf95d457e02ccd578791b2a15 + +Use the return thunk in eBPF generated code, if needed. + +Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> +Signed-off-by: Borislav Petkov <bp@suse.de> +Reviewed-by: Josh Poimboeuf <jpoimboe@kernel.org> +Signed-off-by: Borislav Petkov <bp@suse.de> +--- + arch/x86/net/bpf_jit_comp.c | 19 +++++++++++++++++-- + 1 file changed, 17 insertions(+), 2 deletions(-) + +diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c +index 79d26908c143..b808c9a80d1b 100644 +--- a/arch/x86/net/bpf_jit_comp.c ++++ b/arch/x86/net/bpf_jit_comp.c +@@ -425,6 +425,21 @@ static void emit_indirect_jump(u8 **pprog, int reg, u8 *ip) + *pprog = prog; + } + ++static void emit_return(u8 **pprog, u8 *ip) ++{ ++ u8 *prog = *pprog; ++ ++ if (cpu_feature_enabled(X86_FEATURE_RETHUNK)) { ++ emit_jump(&prog, &__x86_return_thunk, ip); ++ } else { ++ EMIT1(0xC3); /* ret */ ++ if (IS_ENABLED(CONFIG_SLS)) ++ EMIT1(0xCC); /* int3 */ ++ } ++ ++ *pprog = prog; ++} ++ + /* + * Generate the following code: + * +@@ -1685,7 +1700,7 @@ st: if (is_imm8(insn->off)) + ctx->cleanup_addr = proglen; + pop_callee_regs(&prog, callee_regs_used); + EMIT1(0xC9); /* leave */ +- EMIT1(0xC3); /* ret */ ++ emit_return(&prog, image + addrs[i - 1] + (prog - temp)); + break; + + default: +@@ -2188,7 +2203,7 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *i + if (flags & BPF_TRAMP_F_SKIP_FRAME) + /* skip our return address and return to parent */ + EMIT4(0x48, 0x83, 0xC4, 8); /* add rsp, 8 */ +- EMIT1(0xC3); /* ret */ ++ emit_return(&prog, prog); + /* Make sure the trampoline generation logic doesn't overflow */ + if (WARN_ON_ONCE(prog > (u8 *)image_end - BPF_INSN_SAFETY)) { + ret = -EFAULT; diff --git a/debian/patches/bugfix/x86/retbleed/0011-x86-kvm-Fix-SETcc-emulation-for-return-thunks.patch b/debian/patches/bugfix/x86/retbleed/0011-x86-kvm-Fix-SETcc-emulation-for-return-thunks.patch new file mode 100644 index 000000000..bf89ec7ce --- /dev/null +++ b/debian/patches/bugfix/x86/retbleed/0011-x86-kvm-Fix-SETcc-emulation-for-return-thunks.patch @@ -0,0 +1,89 @@ +From: Peter Zijlstra <peterz@infradead.org> +Date: Tue, 14 Jun 2022 23:15:42 +0200 +Subject: x86/kvm: Fix SETcc emulation for return thunks +Origin: https://git.kernel.org/linus/af2e140f34208a5dfb6b7a8ad2d56bda88f0524d + +Prepare the SETcc fastop stuff for when RET can be larger still. + +The tricky bit here is that the expressions should not only be +constant C expressions, but also absolute GAS expressions. This means +no ?: and 'true' is ~0. + +Also ensure em_setcc() has the same alignment as the actual FOP_SETCC() +ops, this ensures there cannot be an alignment hole between em_setcc() +and the first op. + +Additionally, add a .skip directive to the FOP_SETCC() macro to fill +any remaining space with INT3 traps; however the primary purpose of +this directive is to generate AS warnings when the remaining space +goes negative. Which is a very good indication the alignment magic +went side-ways. + +Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> +Signed-off-by: Borislav Petkov <bp@suse.de> +Reviewed-by: Josh Poimboeuf <jpoimboe@kernel.org> +Signed-off-by: Borislav Petkov <bp@suse.de> +--- + arch/x86/kvm/emulate.c | 28 +++++++++++++++------------- + 1 file changed, 15 insertions(+), 13 deletions(-) + +diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c +index 89b11e7dca8a..b01437015f99 100644 +--- a/arch/x86/kvm/emulate.c ++++ b/arch/x86/kvm/emulate.c +@@ -325,13 +325,15 @@ static int fastop(struct x86_emulate_ctxt *ctxt, fastop_t fop); + #define FOP_RET(name) \ + __FOP_RET(#name) + +-#define FOP_START(op) \ ++#define __FOP_START(op, align) \ + extern void em_##op(struct fastop *fake); \ + asm(".pushsection .text, \"ax\" \n\t" \ + ".global em_" #op " \n\t" \ +- ".align " __stringify(FASTOP_SIZE) " \n\t" \ ++ ".align " __stringify(align) " \n\t" \ + "em_" #op ":\n\t" + ++#define FOP_START(op) __FOP_START(op, FASTOP_SIZE) ++ + #define FOP_END \ + ".popsection") + +@@ -435,16 +437,15 @@ static int fastop(struct x86_emulate_ctxt *ctxt, fastop_t fop); + /* + * Depending on .config the SETcc functions look like: + * +- * ENDBR [4 bytes; CONFIG_X86_KERNEL_IBT] +- * SETcc %al [3 bytes] +- * RET [1 byte] +- * INT3 [1 byte; CONFIG_SLS] +- * +- * Which gives possible sizes 4, 5, 8 or 9. When rounded up to the +- * next power-of-two alignment they become 4, 8 or 16 resp. ++ * ENDBR [4 bytes; CONFIG_X86_KERNEL_IBT] ++ * SETcc %al [3 bytes] ++ * RET | JMP __x86_return_thunk [1,5 bytes; CONFIG_RETPOLINE] ++ * INT3 [1 byte; CONFIG_SLS] + */ +-#define SETCC_LENGTH (ENDBR_INSN_SIZE + 4 + IS_ENABLED(CONFIG_SLS)) +-#define SETCC_ALIGN (4 << IS_ENABLED(CONFIG_SLS) << HAS_KERNEL_IBT) ++#define RET_LENGTH (1 + (4 * IS_ENABLED(CONFIG_RETPOLINE)) + \ ++ IS_ENABLED(CONFIG_SLS)) ++#define SETCC_LENGTH (ENDBR_INSN_SIZE + 3 + RET_LENGTH) ++#define SETCC_ALIGN (4 << ((SETCC_LENGTH > 4) & 1) << ((SETCC_LENGTH > 8) & 1)) + static_assert(SETCC_LENGTH <= SETCC_ALIGN); + + #define FOP_SETCC(op) \ +@@ -453,9 +454,10 @@ static_assert(SETCC_LENGTH <= SETCC_ALIGN); + #op ": \n\t" \ + ASM_ENDBR \ + #op " %al \n\t" \ +- __FOP_RET(#op) ++ __FOP_RET(#op) \ ++ ".skip " __stringify(SETCC_ALIGN) " - (.-" #op "), 0xcc \n\t" + +-FOP_START(setcc) ++__FOP_START(setcc, SETCC_ALIGN) + FOP_SETCC(seto) + FOP_SETCC(setno) + FOP_SETCC(setc) diff --git a/debian/patches/bugfix/x86/retbleed/0012-x86-vsyscall_emu-64-Don-t-use-RET-in-vsyscall-emulat.patch b/debian/patches/bugfix/x86/retbleed/0012-x86-vsyscall_emu-64-Don-t-use-RET-in-vsyscall-emulat.patch new file mode 100644 index 000000000..eafd58377 --- /dev/null +++ b/debian/patches/bugfix/x86/retbleed/0012-x86-vsyscall_emu-64-Don-t-use-RET-in-vsyscall-emulat.patch @@ -0,0 +1,43 @@ +From: Peter Zijlstra <peterz@infradead.org> +Date: Tue, 14 Jun 2022 23:15:43 +0200 +Subject: x86/vsyscall_emu/64: Don't use RET in vsyscall emulation +Origin: https://git.kernel.org/linus/15583e514eb16744b80be85dea0774ece153177d + +This is userspace code and doesn't play by the normal kernel rules. + +Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> +Signed-off-by: Borislav Petkov <bp@suse.de> +Reviewed-by: Josh Poimboeuf <jpoimboe@kernel.org> +Signed-off-by: Borislav Petkov <bp@suse.de> +--- + arch/x86/entry/vsyscall/vsyscall_emu_64.S | 9 ++++++--- + 1 file changed, 6 insertions(+), 3 deletions(-) + +diff --git a/arch/x86/entry/vsyscall/vsyscall_emu_64.S b/arch/x86/entry/vsyscall/vsyscall_emu_64.S +index 15e35159ebb6..ef2dd1827243 100644 +--- a/arch/x86/entry/vsyscall/vsyscall_emu_64.S ++++ b/arch/x86/entry/vsyscall/vsyscall_emu_64.S +@@ -19,17 +19,20 @@ __PAGE_ALIGNED_DATA + + mov $__NR_gettimeofday, %rax + syscall +- RET ++ ret ++ int3 + + .balign 1024, 0xcc + mov $__NR_time, %rax + syscall +- RET ++ ret ++ int3 + + .balign 1024, 0xcc + mov $__NR_getcpu, %rax + syscall +- RET ++ ret ++ int3 + + .balign 4096, 0xcc + diff --git a/debian/patches/bugfix/x86/retbleed/0013-x86-sev-Avoid-using-__x86_return_thunk.patch b/debian/patches/bugfix/x86/retbleed/0013-x86-sev-Avoid-using-__x86_return_thunk.patch new file mode 100644 index 000000000..508441811 --- /dev/null +++ b/debian/patches/bugfix/x86/retbleed/0013-x86-sev-Avoid-using-__x86_return_thunk.patch @@ -0,0 +1,43 @@ +From: Kim Phillips <kim.phillips@amd.com> +Date: Tue, 14 Jun 2022 23:15:44 +0200 +Subject: x86/sev: Avoid using __x86_return_thunk +Origin: https://git.kernel.org/linus/0ee9073000e8791f8b134a8ded31bcc767f7f232 + +Specifically, it's because __enc_copy() encrypts the kernel after +being relocated outside the kernel in sme_encrypt_execute(), and the +RET macro's jmp offset isn't amended prior to execution. + +Signed-off-by: Kim Phillips <kim.phillips@amd.com> +Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> +Signed-off-by: Borislav Petkov <bp@suse.de> +Reviewed-by: Josh Poimboeuf <jpoimboe@kernel.org> +Signed-off-by: Borislav Petkov <bp@suse.de> +--- + arch/x86/mm/mem_encrypt_boot.S | 8 ++++++-- + 1 file changed, 6 insertions(+), 2 deletions(-) + +diff --git a/arch/x86/mm/mem_encrypt_boot.S b/arch/x86/mm/mem_encrypt_boot.S +index 3d1dba05fce4..d94dea450fa6 100644 +--- a/arch/x86/mm/mem_encrypt_boot.S ++++ b/arch/x86/mm/mem_encrypt_boot.S +@@ -65,7 +65,9 @@ SYM_FUNC_START(sme_encrypt_execute) + movq %rbp, %rsp /* Restore original stack pointer */ + pop %rbp + +- RET ++ /* Offset to __x86_return_thunk would be wrong here */ ++ ret ++ int3 + SYM_FUNC_END(sme_encrypt_execute) + + SYM_FUNC_START(__enc_copy) +@@ -151,6 +153,8 @@ SYM_FUNC_START(__enc_copy) + pop %r12 + pop %r15 + +- RET ++ /* Offset to __x86_return_thunk would be wrong here */ ++ ret ++ int3 + .L__enc_copy_end: + SYM_FUNC_END(__enc_copy) diff --git a/debian/patches/bugfix/x86/retbleed/0014-x86-Use-return-thunk-in-asm-code.patch b/debian/patches/bugfix/x86/retbleed/0014-x86-Use-return-thunk-in-asm-code.patch new file mode 100644 index 000000000..1c095af37 --- /dev/null +++ b/debian/patches/bugfix/x86/retbleed/0014-x86-Use-return-thunk-in-asm-code.patch @@ -0,0 +1,92 @@ +From: Peter Zijlstra <peterz@infradead.org> +Date: Tue, 14 Jun 2022 23:15:45 +0200 +Subject: x86: Use return-thunk in asm code +Origin: https://git.kernel.org/linus/aa3d480315ba6c3025a60958e1981072ea37c3df + +Use the return thunk in asm code. If the thunk isn't needed, it will +get patched into a RET instruction during boot by apply_returns(). + +Since alternatives can't handle relocations outside of the first +instruction, putting a 'jmp __x86_return_thunk' in one is not valid, +therefore carve out the memmove ERMS path into a separate label and jump +to it. + +Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> +Signed-off-by: Borislav Petkov <bp@suse.de> +Reviewed-by: Josh Poimboeuf <jpoimboe@kernel.org> +Signed-off-by: Borislav Petkov <bp@suse.de> +--- + arch/x86/entry/vdso/Makefile | 1 + + arch/x86/include/asm/linkage.h | 8 ++++++++ + arch/x86/lib/memmove_64.S | 7 ++++++- + 3 files changed, 15 insertions(+), 1 deletion(-) + +diff --git a/arch/x86/entry/vdso/Makefile b/arch/x86/entry/vdso/Makefile +index c2a8b76ae0bc..76cd790ed0bd 100644 +--- a/arch/x86/entry/vdso/Makefile ++++ b/arch/x86/entry/vdso/Makefile +@@ -92,6 +92,7 @@ endif + endif + + $(vobjs): KBUILD_CFLAGS := $(filter-out $(CC_FLAGS_LTO) $(RANDSTRUCT_CFLAGS) $(GCC_PLUGINS_CFLAGS) $(RETPOLINE_CFLAGS),$(KBUILD_CFLAGS)) $(CFL) ++$(vobjs): KBUILD_AFLAGS += -DBUILD_VDSO + + # + # vDSO code runs in userspace and -pg doesn't help with profiling anyway. +diff --git a/arch/x86/include/asm/linkage.h b/arch/x86/include/asm/linkage.h +index 85865f1645bd..e3ae331cabb1 100644 +--- a/arch/x86/include/asm/linkage.h ++++ b/arch/x86/include/asm/linkage.h +@@ -19,19 +19,27 @@ + #define __ALIGN_STR __stringify(__ALIGN) + #endif + ++#if defined(CONFIG_RETPOLINE) && !defined(__DISABLE_EXPORTS) && !defined(BUILD_VDSO) ++#define RET jmp __x86_return_thunk ++#else /* CONFIG_RETPOLINE */ + #ifdef CONFIG_SLS + #define RET ret; int3 + #else + #define RET ret + #endif ++#endif /* CONFIG_RETPOLINE */ + + #else /* __ASSEMBLY__ */ + ++#if defined(CONFIG_RETPOLINE) && !defined(__DISABLE_EXPORTS) && !defined(BUILD_VDSO) ++#define ASM_RET "jmp __x86_return_thunk\n\t" ++#else /* CONFIG_RETPOLINE */ + #ifdef CONFIG_SLS + #define ASM_RET "ret; int3\n\t" + #else + #define ASM_RET "ret\n\t" + #endif ++#endif /* CONFIG_RETPOLINE */ + + #endif /* __ASSEMBLY__ */ + +diff --git a/arch/x86/lib/memmove_64.S b/arch/x86/lib/memmove_64.S +index d83cba364e31..724bbf83eb5b 100644 +--- a/arch/x86/lib/memmove_64.S ++++ b/arch/x86/lib/memmove_64.S +@@ -39,7 +39,7 @@ SYM_FUNC_START(__memmove) + /* FSRM implies ERMS => no length checks, do the copy directly */ + .Lmemmove_begin_forward: + ALTERNATIVE "cmp $0x20, %rdx; jb 1f", "", X86_FEATURE_FSRM +- ALTERNATIVE "", __stringify(movq %rdx, %rcx; rep movsb; RET), X86_FEATURE_ERMS ++ ALTERNATIVE "", "jmp .Lmemmove_erms", X86_FEATURE_ERMS + + /* + * movsq instruction have many startup latency +@@ -205,6 +205,11 @@ SYM_FUNC_START(__memmove) + movb %r11b, (%rdi) + 13: + RET ++ ++.Lmemmove_erms: ++ movq %rdx, %rcx ++ rep movsb ++ RET + SYM_FUNC_END(__memmove) + EXPORT_SYMBOL(__memmove) + diff --git a/debian/patches/bugfix/x86/retbleed/0015-x86-entry-Avoid-very-early-RET.patch b/debian/patches/bugfix/x86/retbleed/0015-x86-entry-Avoid-very-early-RET.patch new file mode 100644 index 000000000..71935dd77 --- /dev/null +++ b/debian/patches/bugfix/x86/retbleed/0015-x86-entry-Avoid-very-early-RET.patch @@ -0,0 +1,59 @@ +From: Peter Zijlstra <peterz@infradead.org> +Date: Tue, 14 Jun 2022 23:15:46 +0200 +Subject: x86/entry: Avoid very early RET +Origin: https://git.kernel.org/linus/7c81c0c9210c9bfab2bae76aab2999de5bad27db + +Commit + + ee774dac0da1 ("x86/entry: Move PUSH_AND_CLEAR_REGS out of error_entry()") + +manages to introduce a CALL/RET pair that is before SWITCH_TO_KERNEL_CR3, +which means it is before RETBleed can be mitigated. + +Revert to an earlier version of the commit in Fixes. Down side is that +this will bloat .text size somewhat. The alternative is fully reverting +it. + +The purpose of this patch was to allow migrating error_entry() to C, +including the whole of kPTI. Much care needs to be taken moving that +forward to not re-introduce this problem of early RETs. + +Fixes: ee774dac0da1 ("x86/entry: Move PUSH_AND_CLEAR_REGS out of error_entry()") +Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> +Signed-off-by: Borislav Petkov <bp@suse.de> +Reviewed-by: Josh Poimboeuf <jpoimboe@kernel.org> +Signed-off-by: Borislav Petkov <bp@suse.de> +--- + arch/x86/entry/entry_64.S | 12 ++---------- + 1 file changed, 2 insertions(+), 10 deletions(-) + +diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S +index 4300ba49b5ee..9f5d6a4f293e 100644 +--- a/arch/x86/entry/entry_64.S ++++ b/arch/x86/entry/entry_64.S +@@ -322,14 +322,6 @@ SYM_CODE_END(ret_from_fork) + #endif + .endm + +-/* Save all registers in pt_regs */ +-SYM_CODE_START_LOCAL(push_and_clear_regs) +- UNWIND_HINT_FUNC +- PUSH_AND_CLEAR_REGS save_ret=1 +- ENCODE_FRAME_POINTER 8 +- RET +-SYM_CODE_END(push_and_clear_regs) +- + /** + * idtentry_body - Macro to emit code calling the C function + * @cfunc: C function to be called +@@ -337,8 +329,8 @@ SYM_CODE_END(push_and_clear_regs) + */ + .macro idtentry_body cfunc has_error_code:req + +- call push_and_clear_regs +- UNWIND_HINT_REGS ++ PUSH_AND_CLEAR_REGS ++ ENCODE_FRAME_POINTER + + /* + * Call error_entry() and switch to the task stack if from userspace. diff --git a/debian/patches/bugfix/x86/retbleed/0016-objtool-Treat-.text.__x86.-as-noinstr.patch b/debian/patches/bugfix/x86/retbleed/0016-objtool-Treat-.text.__x86.-as-noinstr.patch new file mode 100644 index 000000000..60f2c0718 --- /dev/null +++ b/debian/patches/bugfix/x86/retbleed/0016-objtool-Treat-.text.__x86.-as-noinstr.patch @@ -0,0 +1,32 @@ +From: Peter Zijlstra <peterz@infradead.org> +Date: Tue, 14 Jun 2022 23:15:47 +0200 +Subject: objtool: Treat .text.__x86.* as noinstr +Origin: https://git.kernel.org/linus/951ddecf435659553ed15a9214e153a3af43a9a1 + +Needed because zen_untrain_ret() will be called from noinstr code. + +Also makes sense since the thunks MUST NOT contain instrumentation nor +be poked with dynamic instrumentation. + +Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> +Signed-off-by: Borislav Petkov <bp@suse.de> +Reviewed-by: Josh Poimboeuf <jpoimboe@kernel.org> +Signed-off-by: Borislav Petkov <bp@suse.de> +--- + tools/objtool/check.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +diff --git a/tools/objtool/check.c b/tools/objtool/check.c +index f6d4ffa82432..b98fd68013c3 100644 +--- a/tools/objtool/check.c ++++ b/tools/objtool/check.c +@@ -376,7 +376,8 @@ static int decode_instructions(struct objtool_file *file) + sec->text = true; + + if (!strcmp(sec->name, ".noinstr.text") || +- !strcmp(sec->name, ".entry.text")) ++ !strcmp(sec->name, ".entry.text") || ++ !strncmp(sec->name, ".text.__x86.", 12)) + sec->noinstr = true; + + for (offset = 0; offset < sec->sh.sh_size; offset += insn->len) { diff --git a/debian/patches/bugfix/x86/retbleed/0017-x86-Add-magic-AMD-return-thunk.patch b/debian/patches/bugfix/x86/retbleed/0017-x86-Add-magic-AMD-return-thunk.patch new file mode 100644 index 000000000..b89380a51 --- /dev/null +++ b/debian/patches/bugfix/x86/retbleed/0017-x86-Add-magic-AMD-return-thunk.patch @@ -0,0 +1,365 @@ +From: Peter Zijlstra <peterz@infradead.org> +Date: Tue, 14 Jun 2022 23:15:48 +0200 +Subject: x86: Add magic AMD return-thunk +Origin: https://git.kernel.org/linus/a149180fbcf336e97ce4eb2cdc13672727feb94d + +Note: needs to be in a section distinct from Retpolines such that the +Retpoline RET substitution cannot possibly use immediate jumps. + +ORC unwinding for zen_untrain_ret() and __x86_return_thunk() is a +little tricky but works due to the fact that zen_untrain_ret() doesn't +have any stack ops and as such will emit a single ORC entry at the +start (+0x3f). + +Meanwhile, unwinding an IP, including the __x86_return_thunk() one +(+0x40) will search for the largest ORC entry smaller or equal to the +IP, these will find the one ORC entry (+0x3f) and all works. + + [ Alexandre: SVM part. ] + [ bp: Build fix, massages. ] + +Suggested-by: Andrew Cooper <Andrew.Cooper3@citrix.com> +Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> +Signed-off-by: Borislav Petkov <bp@suse.de> +Reviewed-by: Josh Poimboeuf <jpoimboe@kernel.org> +Signed-off-by: Borislav Petkov <bp@suse.de> +--- + arch/x86/entry/entry_64.S | 6 +++ + arch/x86/entry/entry_64_compat.S | 4 ++ + arch/x86/include/asm/cpufeatures.h | 1 + + arch/x86/include/asm/disabled-features.h | 3 +- + arch/x86/include/asm/nospec-branch.h | 17 +++++++ + arch/x86/kernel/vmlinux.lds.S | 2 +- + arch/x86/kvm/svm/vmenter.S | 18 +++++++ + arch/x86/lib/retpoline.S | 64 ++++++++++++++++++++++-- + tools/objtool/check.c | 22 ++++++-- + 9 files changed, 128 insertions(+), 9 deletions(-) + +diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S +index 9f5d6a4f293e..1f4b18c8909b 100644 +--- a/arch/x86/entry/entry_64.S ++++ b/arch/x86/entry/entry_64.S +@@ -96,6 +96,7 @@ SYM_CODE_START(entry_SYSCALL_64) + + SYM_INNER_LABEL(entry_SYSCALL_64_safe_stack, SYM_L_GLOBAL) + ANNOTATE_NOENDBR ++ UNTRAIN_RET + + /* Construct struct pt_regs on stack */ + pushq $__USER_DS /* pt_regs->ss */ +@@ -717,6 +718,7 @@ SYM_INNER_LABEL(native_irq_return_iret, SYM_L_GLOBAL) + pushq %rdi /* Stash user RDI */ + swapgs /* to kernel GS */ + SWITCH_TO_KERNEL_CR3 scratch_reg=%rdi /* to kernel CR3 */ ++ UNTRAIN_RET + + movq PER_CPU_VAR(espfix_waddr), %rdi + movq %rax, (0*8)(%rdi) /* user RAX */ +@@ -911,6 +913,7 @@ SYM_CODE_START_LOCAL(paranoid_entry) + * be retrieved from a kernel internal table. + */ + SAVE_AND_SWITCH_TO_KERNEL_CR3 scratch_reg=%rax save_reg=%r14 ++ UNTRAIN_RET + + /* + * Handling GSBASE depends on the availability of FSGSBASE. +@@ -1020,6 +1023,7 @@ SYM_CODE_START_LOCAL(error_entry) + FENCE_SWAPGS_USER_ENTRY + /* We have user CR3. Change to kernel CR3. */ + SWITCH_TO_KERNEL_CR3 scratch_reg=%rax ++ UNTRAIN_RET + + leaq 8(%rsp), %rdi /* arg0 = pt_regs pointer */ + .Lerror_entry_from_usermode_after_swapgs: +@@ -1072,6 +1076,7 @@ SYM_CODE_START_LOCAL(error_entry) + swapgs + FENCE_SWAPGS_USER_ENTRY + SWITCH_TO_KERNEL_CR3 scratch_reg=%rax ++ UNTRAIN_RET + + /* + * Pretend that the exception came from user mode: set up pt_regs +@@ -1167,6 +1172,7 @@ SYM_CODE_START(asm_exc_nmi) + movq %rsp, %rdx + movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp + UNWIND_HINT_IRET_REGS base=%rdx offset=8 ++ UNTRAIN_RET + pushq 5*8(%rdx) /* pt_regs->ss */ + pushq 4*8(%rdx) /* pt_regs->rsp */ + pushq 3*8(%rdx) /* pt_regs->flags */ +diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S +index d1052742ad0c..03d74c5153fb 100644 +--- a/arch/x86/entry/entry_64_compat.S ++++ b/arch/x86/entry/entry_64_compat.S +@@ -14,6 +14,7 @@ + #include <asm/irqflags.h> + #include <asm/asm.h> + #include <asm/smap.h> ++#include <asm/nospec-branch.h> + #include <linux/linkage.h> + #include <linux/err.h> + +@@ -72,6 +73,7 @@ SYM_CODE_START(entry_SYSENTER_compat) + pushq $__USER32_CS /* pt_regs->cs */ + pushq $0 /* pt_regs->ip = 0 (placeholder) */ + SYM_INNER_LABEL(entry_SYSENTER_compat_after_hwframe, SYM_L_GLOBAL) ++ UNTRAIN_RET + + /* + * User tracing code (ptrace or signal handlers) might assume that +@@ -190,6 +192,7 @@ SYM_CODE_START(entry_SYSCALL_compat) + + SYM_INNER_LABEL(entry_SYSCALL_compat_safe_stack, SYM_L_GLOBAL) + ANNOTATE_NOENDBR ++ UNTRAIN_RET + + /* Construct struct pt_regs on stack */ + pushq $__USER32_DS /* pt_regs->ss */ +@@ -332,6 +335,7 @@ SYM_CODE_START(entry_INT80_compat) + pushq 0*8(%rax) /* regs->orig_ax */ + .Lint80_keep_stack: + ++ UNTRAIN_RET + PUSH_AND_CLEAR_REGS rax=$-ENOSYS + UNWIND_HINT_REGS + +diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h +index 295e69090fb8..fa3d0db1470e 100644 +--- a/arch/x86/include/asm/cpufeatures.h ++++ b/arch/x86/include/asm/cpufeatures.h +@@ -301,6 +301,7 @@ + #define X86_FEATURE_RETPOLINE (11*32+12) /* "" Generic Retpoline mitigation for Spectre variant 2 */ + #define X86_FEATURE_RETPOLINE_LFENCE (11*32+13) /* "" Use LFENCE for Spectre variant 2 */ + #define X86_FEATURE_RETHUNK (11*32+14) /* "" Use REturn THUNK */ ++#define X86_FEATURE_UNRET (11*32+15) /* "" AMD BTB untrain return */ + + /* Intel-defined CPU features, CPUID level 0x00000007:1 (EAX), word 12 */ + #define X86_FEATURE_AVX_VNNI (12*32+ 4) /* AVX VNNI instructions */ +diff --git a/arch/x86/include/asm/disabled-features.h b/arch/x86/include/asm/disabled-features.h +index 641c479cca17..db75da511a36 100644 +--- a/arch/x86/include/asm/disabled-features.h ++++ b/arch/x86/include/asm/disabled-features.h +@@ -55,7 +55,8 @@ + #else + # define DISABLE_RETPOLINE ((1 << (X86_FEATURE_RETPOLINE & 31)) | \ + (1 << (X86_FEATURE_RETPOLINE_LFENCE & 31)) | \ +- (1 << (X86_FEATURE_RETHUNK & 31))) ++ (1 << (X86_FEATURE_RETHUNK & 31)) | \ ++ (1 << (X86_FEATURE_UNRET & 31))) + #endif + + #ifdef CONFIG_INTEL_IOMMU_SVM +diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h +index 829c9f827a96..5ca60ae0d14f 100644 +--- a/arch/x86/include/asm/nospec-branch.h ++++ b/arch/x86/include/asm/nospec-branch.h +@@ -112,6 +112,22 @@ + #endif + .endm + ++/* ++ * Mitigate RETBleed for AMD/Hygon Zen uarch. Requires KERNEL CR3 because the ++ * return thunk isn't mapped into the userspace tables (then again, AMD ++ * typically has NO_MELTDOWN). ++ * ++ * Doesn't clobber any registers but does require a stable stack. ++ * ++ * As such, this must be placed after every *SWITCH_TO_KERNEL_CR3 at a point ++ * where we have a stack but before any RET instruction. ++ */ ++.macro UNTRAIN_RET ++#ifdef CONFIG_RETPOLINE ++ ALTERNATIVE "", "call zen_untrain_ret", X86_FEATURE_UNRET ++#endif ++.endm ++ + #else /* __ASSEMBLY__ */ + + #define ANNOTATE_RETPOLINE_SAFE \ +@@ -124,6 +140,7 @@ typedef u8 retpoline_thunk_t[RETPOLINE_THUNK_SIZE]; + extern retpoline_thunk_t __x86_indirect_thunk_array[]; + + extern void __x86_return_thunk(void); ++extern void zen_untrain_ret(void); + + #ifdef CONFIG_RETPOLINE + +diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S +index ada7eb738113..c375420036fb 100644 +--- a/arch/x86/kernel/vmlinux.lds.S ++++ b/arch/x86/kernel/vmlinux.lds.S +@@ -141,7 +141,7 @@ SECTIONS + + #ifdef CONFIG_RETPOLINE + __indirect_thunk_start = .; +- *(.text.__x86.indirect_thunk) ++ *(.text.__x86.*) + __indirect_thunk_end = .; + #endif + } :text =0xcccc +diff --git a/arch/x86/kvm/svm/vmenter.S b/arch/x86/kvm/svm/vmenter.S +index dfaeb47fcf2a..723f8534986c 100644 +--- a/arch/x86/kvm/svm/vmenter.S ++++ b/arch/x86/kvm/svm/vmenter.S +@@ -110,6 +110,15 @@ SYM_FUNC_START(__svm_vcpu_run) + mov %r15, VCPU_R15(%_ASM_AX) + #endif + ++ /* ++ * Mitigate RETBleed for AMD/Hygon Zen uarch. RET should be ++ * untrained as soon as we exit the VM and are back to the ++ * kernel. This should be done before re-enabling interrupts ++ * because interrupt handlers won't sanitize 'ret' if the return is ++ * from the kernel. ++ */ ++ UNTRAIN_RET ++ + /* + * Clear all general purpose registers except RSP and RAX to prevent + * speculative use of the guest's values, even those that are reloaded +@@ -190,6 +199,15 @@ SYM_FUNC_START(__svm_sev_es_vcpu_run) + FILL_RETURN_BUFFER %_ASM_AX, RSB_CLEAR_LOOPS, X86_FEATURE_RETPOLINE + #endif + ++ /* ++ * Mitigate RETBleed for AMD/Hygon Zen uarch. RET should be ++ * untrained as soon as we exit the VM and are back to the ++ * kernel. This should be done before re-enabling interrupts ++ * because interrupt handlers won't sanitize RET if the return is ++ * from the kernel. ++ */ ++ UNTRAIN_RET ++ + pop %_ASM_BX + + #ifdef CONFIG_X86_64 +diff --git a/arch/x86/lib/retpoline.S b/arch/x86/lib/retpoline.S +index 4467c21215f4..fdd16163b996 100644 +--- a/arch/x86/lib/retpoline.S ++++ b/arch/x86/lib/retpoline.S +@@ -72,11 +72,67 @@ SYM_CODE_END(__x86_indirect_thunk_array) + * This function name is magical and is used by -mfunction-return=thunk-extern + * for the compiler to generate JMPs to it. + */ +-SYM_CODE_START(__x86_return_thunk) +- UNWIND_HINT_EMPTY +- ANNOTATE_NOENDBR ++ .section .text.__x86.return_thunk ++ ++/* ++ * Safety details here pertain to the AMD Zen{1,2} microarchitecture: ++ * 1) The RET at __x86_return_thunk must be on a 64 byte boundary, for ++ * alignment within the BTB. ++ * 2) The instruction at zen_untrain_ret must contain, and not ++ * end with, the 0xc3 byte of the RET. ++ * 3) STIBP must be enabled, or SMT disabled, to prevent the sibling thread ++ * from re-poisioning the BTB prediction. ++ */ ++ .align 64 ++ .skip 63, 0xcc ++SYM_FUNC_START_NOALIGN(zen_untrain_ret); ++ ++ /* ++ * As executed from zen_untrain_ret, this is: ++ * ++ * TEST $0xcc, %bl ++ * LFENCE ++ * JMP __x86_return_thunk ++ * ++ * Executing the TEST instruction has a side effect of evicting any BTB ++ * prediction (potentially attacker controlled) attached to the RET, as ++ * __x86_return_thunk + 1 isn't an instruction boundary at the moment. ++ */ ++ .byte 0xf6 ++ ++ /* ++ * As executed from __x86_return_thunk, this is a plain RET. ++ * ++ * As part of the TEST above, RET is the ModRM byte, and INT3 the imm8. ++ * ++ * We subsequently jump backwards and architecturally execute the RET. ++ * This creates a correct BTB prediction (type=ret), but in the ++ * meantime we suffer Straight Line Speculation (because the type was ++ * no branch) which is halted by the INT3. ++ * ++ * With SMT enabled and STIBP active, a sibling thread cannot poison ++ * RET's prediction to a type of its choice, but can evict the ++ * prediction due to competitive sharing. If the prediction is ++ * evicted, __x86_return_thunk will suffer Straight Line Speculation ++ * which will be contained safely by the INT3. ++ */ ++SYM_INNER_LABEL(__x86_return_thunk, SYM_L_GLOBAL) + ret + int3 + SYM_CODE_END(__x86_return_thunk) + +-__EXPORT_THUNK(__x86_return_thunk) ++ /* ++ * Ensure the TEST decoding / BTB invalidation is complete. ++ */ ++ lfence ++ ++ /* ++ * Jump back and execute the RET in the middle of the TEST instruction. ++ * INT3 is for SLS protection. ++ */ ++ jmp __x86_return_thunk ++ int3 ++SYM_FUNC_END(zen_untrain_ret) ++__EXPORT_THUNK(zen_untrain_ret) ++ ++EXPORT_SYMBOL(__x86_return_thunk) +diff --git a/tools/objtool/check.c b/tools/objtool/check.c +index b98fd68013c3..4252cd05dfc4 100644 +--- a/tools/objtool/check.c ++++ b/tools/objtool/check.c +@@ -1302,7 +1302,7 @@ static void add_retpoline_call(struct objtool_file *file, struct instruction *in + annotate_call_site(file, insn, false); + } + +-static void add_return_call(struct objtool_file *file, struct instruction *insn) ++static void add_return_call(struct objtool_file *file, struct instruction *insn, bool add) + { + /* + * Return thunk tail calls are really just returns in disguise, +@@ -1311,7 +1311,8 @@ static void add_return_call(struct objtool_file *file, struct instruction *insn) + insn->type = INSN_RETURN; + insn->retpoline_safe = true; + +- list_add_tail(&insn->call_node, &file->return_thunk_list); ++ if (add) ++ list_add_tail(&insn->call_node, &file->return_thunk_list); + } + + static bool same_function(struct instruction *insn1, struct instruction *insn2) +@@ -1367,7 +1368,7 @@ static int add_jump_destinations(struct objtool_file *file) + add_retpoline_call(file, insn); + continue; + } else if (reloc->sym->return_thunk) { +- add_return_call(file, insn); ++ add_return_call(file, insn, true); + continue; + } else if (insn->func) { + /* +@@ -1387,6 +1388,21 @@ static int add_jump_destinations(struct objtool_file *file) + + jump_dest = find_insn(file, dest_sec, dest_off); + if (!jump_dest) { ++ struct symbol *sym = find_symbol_by_offset(dest_sec, dest_off); ++ ++ /* ++ * This is a special case for zen_untrain_ret(). ++ * It jumps to __x86_return_thunk(), but objtool ++ * can't find the thunk's starting RET ++ * instruction, because the RET is also in the ++ * middle of another instruction. Objtool only ++ * knows about the outer instruction. ++ */ ++ if (sym && sym->return_thunk) { ++ add_return_call(file, insn, false); ++ continue; ++ } ++ + WARN_FUNC("can't find jump dest instruction at %s+0x%lx", + insn->sec, insn->offset, dest_sec->name, + dest_off); diff --git a/debian/patches/bugfix/x86/retbleed/0018-x86-bugs-Report-AMD-retbleed-vulnerability.patch b/debian/patches/bugfix/x86/retbleed/0018-x86-bugs-Report-AMD-retbleed-vulnerability.patch new file mode 100644 index 000000000..3bc302209 --- /dev/null +++ b/debian/patches/bugfix/x86/retbleed/0018-x86-bugs-Report-AMD-retbleed-vulnerability.patch @@ -0,0 +1,173 @@ +From: Alexandre Chartre <alexandre.chartre@oracle.com> +Date: Tue, 14 Jun 2022 23:15:49 +0200 +Subject: x86/bugs: Report AMD retbleed vulnerability +Origin: https://git.kernel.org/linus/6b80b59b3555706508008f1f127b5412c89c7fd8 + +Report that AMD x86 CPUs are vulnerable to the RETBleed (Arbitrary +Speculative Code Execution with Return Instructions) attack. + + [peterz: add hygon] + [kim: invert parity; fam15h] + +Co-developed-by: Kim Phillips <kim.phillips@amd.com> +Signed-off-by: Kim Phillips <kim.phillips@amd.com> +Signed-off-by: Alexandre Chartre <alexandre.chartre@oracle.com> +Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> +Signed-off-by: Borislav Petkov <bp@suse.de> +Reviewed-by: Josh Poimboeuf <jpoimboe@kernel.org> +Signed-off-by: Borislav Petkov <bp@suse.de> +--- + arch/x86/include/asm/cpufeatures.h | 1 + + arch/x86/kernel/cpu/bugs.c | 13 +++++++++++++ + arch/x86/kernel/cpu/common.c | 19 +++++++++++++++++++ + drivers/base/cpu.c | 8 ++++++++ + include/linux/cpu.h | 2 ++ + 5 files changed, 43 insertions(+) + +diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h +index fa3d0db1470e..c16503ca3b75 100644 +--- a/arch/x86/include/asm/cpufeatures.h ++++ b/arch/x86/include/asm/cpufeatures.h +@@ -453,5 +453,6 @@ + #define X86_BUG_ITLB_MULTIHIT X86_BUG(23) /* CPU may incur MCE during certain page attribute changes */ + #define X86_BUG_SRBDS X86_BUG(24) /* CPU may leak RNG bits if not mitigated */ + #define X86_BUG_MMIO_STALE_DATA X86_BUG(25) /* CPU is affected by Processor MMIO Stale Data vulnerabilities */ ++#define X86_BUG_RETBLEED X86_BUG(26) /* CPU is affected by RETBleed */ + + #endif /* _ASM_X86_CPUFEATURES_H */ +diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c +index 74c62cc47a5f..74fdd21e416b 100644 +--- a/arch/x86/kernel/cpu/bugs.c ++++ b/arch/x86/kernel/cpu/bugs.c +@@ -1994,6 +1994,11 @@ static ssize_t srbds_show_state(char *buf) + return sprintf(buf, "%s\n", srbds_strings[srbds_mitigation]); + } + ++static ssize_t retbleed_show_state(char *buf) ++{ ++ return sprintf(buf, "Vulnerable\n"); ++} ++ + static ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr, + char *buf, unsigned int bug) + { +@@ -2039,6 +2044,9 @@ static ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr + case X86_BUG_MMIO_STALE_DATA: + return mmio_stale_data_show_state(buf); + ++ case X86_BUG_RETBLEED: ++ return retbleed_show_state(buf); ++ + default: + break; + } +@@ -2095,4 +2103,9 @@ ssize_t cpu_show_mmio_stale_data(struct device *dev, struct device_attribute *at + { + return cpu_show_common(dev, attr, buf, X86_BUG_MMIO_STALE_DATA); + } ++ ++ssize_t cpu_show_retbleed(struct device *dev, struct device_attribute *attr, char *buf) ++{ ++ return cpu_show_common(dev, attr, buf, X86_BUG_RETBLEED); ++} + #endif +diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c +index 4730b0a58f24..4089c173c6ae 100644 +--- a/arch/x86/kernel/cpu/common.c ++++ b/arch/x86/kernel/cpu/common.c +@@ -1205,16 +1205,27 @@ static const __initconst struct x86_cpu_id cpu_vuln_whitelist[] = { + {} + }; + ++#define VULNBL(vendor, family, model, blacklist) \ ++ X86_MATCH_VENDOR_FAM_MODEL(vendor, family, model, blacklist) ++ + #define VULNBL_INTEL_STEPPINGS(model, steppings, issues) \ + X86_MATCH_VENDOR_FAM_MODEL_STEPPINGS_FEATURE(INTEL, 6, \ + INTEL_FAM6_##model, steppings, \ + X86_FEATURE_ANY, issues) + ++#define VULNBL_AMD(family, blacklist) \ ++ VULNBL(AMD, family, X86_MODEL_ANY, blacklist) ++ ++#define VULNBL_HYGON(family, blacklist) \ ++ VULNBL(HYGON, family, X86_MODEL_ANY, blacklist) ++ + #define SRBDS BIT(0) + /* CPU is affected by X86_BUG_MMIO_STALE_DATA */ + #define MMIO BIT(1) + /* CPU is affected by Shared Buffers Data Sampling (SBDS), a variant of X86_BUG_MMIO_STALE_DATA */ + #define MMIO_SBDS BIT(2) ++/* CPU is affected by RETbleed, speculating where you would not expect it */ ++#define RETBLEED BIT(3) + + static const struct x86_cpu_id cpu_vuln_blacklist[] __initconst = { + VULNBL_INTEL_STEPPINGS(IVYBRIDGE, X86_STEPPING_ANY, SRBDS), +@@ -1247,6 +1258,11 @@ static const struct x86_cpu_id cpu_vuln_blacklist[] __initconst = { + VULNBL_INTEL_STEPPINGS(ATOM_TREMONT, X86_STEPPINGS(0x1, 0x1), MMIO | MMIO_SBDS), + VULNBL_INTEL_STEPPINGS(ATOM_TREMONT_D, X86_STEPPING_ANY, MMIO), + VULNBL_INTEL_STEPPINGS(ATOM_TREMONT_L, X86_STEPPINGS(0x0, 0x0), MMIO | MMIO_SBDS), ++ ++ VULNBL_AMD(0x15, RETBLEED), ++ VULNBL_AMD(0x16, RETBLEED), ++ VULNBL_AMD(0x17, RETBLEED), ++ VULNBL_HYGON(0x18, RETBLEED), + {} + }; + +@@ -1348,6 +1364,9 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c) + !arch_cap_mmio_immune(ia32_cap)) + setup_force_cpu_bug(X86_BUG_MMIO_STALE_DATA); + ++ if (cpu_matches(cpu_vuln_blacklist, RETBLEED)) ++ setup_force_cpu_bug(X86_BUG_RETBLEED); ++ + if (cpu_matches(cpu_vuln_whitelist, NO_MELTDOWN)) + return; + +diff --git a/drivers/base/cpu.c b/drivers/base/cpu.c +index a97776ea9d99..4c98849577d4 100644 +--- a/drivers/base/cpu.c ++++ b/drivers/base/cpu.c +@@ -570,6 +570,12 @@ ssize_t __weak cpu_show_mmio_stale_data(struct device *dev, + return sysfs_emit(buf, "Not affected\n"); + } + ++ssize_t __weak cpu_show_retbleed(struct device *dev, ++ struct device_attribute *attr, char *buf) ++{ ++ return sysfs_emit(buf, "Not affected\n"); ++} ++ + static DEVICE_ATTR(meltdown, 0444, cpu_show_meltdown, NULL); + static DEVICE_ATTR(spectre_v1, 0444, cpu_show_spectre_v1, NULL); + static DEVICE_ATTR(spectre_v2, 0444, cpu_show_spectre_v2, NULL); +@@ -580,6 +586,7 @@ static DEVICE_ATTR(tsx_async_abort, 0444, cpu_show_tsx_async_abort, NULL); + static DEVICE_ATTR(itlb_multihit, 0444, cpu_show_itlb_multihit, NULL); + static DEVICE_ATTR(srbds, 0444, cpu_show_srbds, NULL); + static DEVICE_ATTR(mmio_stale_data, 0444, cpu_show_mmio_stale_data, NULL); ++static DEVICE_ATTR(retbleed, 0444, cpu_show_retbleed, NULL); + + static struct attribute *cpu_root_vulnerabilities_attrs[] = { + &dev_attr_meltdown.attr, +@@ -592,6 +599,7 @@ static struct attribute *cpu_root_vulnerabilities_attrs[] = { + &dev_attr_itlb_multihit.attr, + &dev_attr_srbds.attr, + &dev_attr_mmio_stale_data.attr, ++ &dev_attr_retbleed.attr, + NULL + }; + +diff --git a/include/linux/cpu.h b/include/linux/cpu.h +index 2c7477354744..314802f98b9d 100644 +--- a/include/linux/cpu.h ++++ b/include/linux/cpu.h +@@ -68,6 +68,8 @@ extern ssize_t cpu_show_srbds(struct device *dev, struct device_attribute *attr, + extern ssize_t cpu_show_mmio_stale_data(struct device *dev, + struct device_attribute *attr, + char *buf); ++extern ssize_t cpu_show_retbleed(struct device *dev, ++ struct device_attribute *attr, char *buf); + + extern __printf(4, 5) + struct device *cpu_device_create(struct device *parent, void *drvdata, diff --git a/debian/patches/bugfix/x86/retbleed/0019-x86-bugs-Add-AMD-retbleed-boot-parameter.patch b/debian/patches/bugfix/x86/retbleed/0019-x86-bugs-Add-AMD-retbleed-boot-parameter.patch new file mode 100644 index 000000000..0ee14f2d0 --- /dev/null +++ b/debian/patches/bugfix/x86/retbleed/0019-x86-bugs-Add-AMD-retbleed-boot-parameter.patch @@ -0,0 +1,207 @@ +From: Alexandre Chartre <alexandre.chartre@oracle.com> +Date: Tue, 14 Jun 2022 23:15:50 +0200 +Subject: x86/bugs: Add AMD retbleed= boot parameter +Origin: https://git.kernel.org/linus/7fbf47c7ce50b38a64576b150e7011ae73d54669 + +Add the "retbleed=<value>" boot parameter to select a mitigation for +RETBleed. Possible values are "off", "auto" and "unret" +(JMP2RET mitigation). The default value is "auto". + +Currently, "retbleed=auto" will select the unret mitigation on +AMD and Hygon and no mitigation on Intel (JMP2RET is not effective on +Intel). + + [peterz: rebase; add hygon] + [jpoimboe: cleanups] + +Signed-off-by: Alexandre Chartre <alexandre.chartre@oracle.com> +Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> +Signed-off-by: Borislav Petkov <bp@suse.de> +Reviewed-by: Josh Poimboeuf <jpoimboe@kernel.org> +Signed-off-by: Borislav Petkov <bp@suse.de> +--- + .../admin-guide/kernel-parameters.txt | 15 +++ + arch/x86/Kconfig | 3 + + arch/x86/kernel/cpu/bugs.c | 108 +++++++++++++++++- + 3 files changed, 125 insertions(+), 1 deletion(-) + +diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt +index 2522b11e593f..529613367e7d 100644 +--- a/Documentation/admin-guide/kernel-parameters.txt ++++ b/Documentation/admin-guide/kernel-parameters.txt +@@ -5197,6 +5197,21 @@ + + retain_initrd [RAM] Keep initrd memory after extraction + ++ retbleed= [X86] Control mitigation of RETBleed (Arbitrary ++ Speculative Code Execution with Return Instructions) ++ vulnerability. ++ ++ off - unconditionally disable ++ auto - automatically select a migitation ++ unret - force enable untrained return thunks, ++ only effective on AMD Zen {1,2} ++ based systems. ++ ++ Selecting 'auto' will choose a mitigation method at run ++ time according to the CPU. ++ ++ Not specifying this option is equivalent to retbleed=auto. ++ + rfkill.default_state= + 0 "airplane mode". All wifi, bluetooth, wimax, gps, fm, + etc. communication is blocked by default. +diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig +index be0b95e51df6..e35eecfb74f2 100644 +--- a/arch/x86/Kconfig ++++ b/arch/x86/Kconfig +@@ -475,6 +475,9 @@ config RETPOLINE + config CC_HAS_SLS + def_bool $(cc-option,-mharden-sls=all) + ++config CC_HAS_RETURN_THUNK ++ def_bool $(cc-option,-mfunction-return=thunk-extern) ++ + config SLS + bool "Mitigate Straight-Line-Speculation" + depends on CC_HAS_SLS && X86_64 +diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c +index 74fdd21e416b..bdeb732ff2d9 100644 +--- a/arch/x86/kernel/cpu/bugs.c ++++ b/arch/x86/kernel/cpu/bugs.c +@@ -37,6 +37,7 @@ + #include "cpu.h" + + static void __init spectre_v1_select_mitigation(void); ++static void __init retbleed_select_mitigation(void); + static void __init spectre_v2_select_mitigation(void); + static void __init ssb_select_mitigation(void); + static void __init l1tf_select_mitigation(void); +@@ -120,6 +121,12 @@ void __init check_bugs(void) + + /* Select the proper CPU mitigations before patching alternatives: */ + spectre_v1_select_mitigation(); ++ retbleed_select_mitigation(); ++ /* ++ * spectre_v2_select_mitigation() relies on the state set by ++ * retbleed_select_mitigation(); specifically the STIBP selection is ++ * forced for UNRET. ++ */ + spectre_v2_select_mitigation(); + ssb_select_mitigation(); + l1tf_select_mitigation(); +@@ -752,6 +759,100 @@ static int __init nospectre_v1_cmdline(char *str) + } + early_param("nospectre_v1", nospectre_v1_cmdline); + ++#undef pr_fmt ++#define pr_fmt(fmt) "RETBleed: " fmt ++ ++enum retbleed_mitigation { ++ RETBLEED_MITIGATION_NONE, ++ RETBLEED_MITIGATION_UNRET, ++}; ++ ++enum retbleed_mitigation_cmd { ++ RETBLEED_CMD_OFF, ++ RETBLEED_CMD_AUTO, ++ RETBLEED_CMD_UNRET, ++}; ++ ++const char * const retbleed_strings[] = { ++ [RETBLEED_MITIGATION_NONE] = "Vulnerable", ++ [RETBLEED_MITIGATION_UNRET] = "Mitigation: untrained return thunk", ++}; ++ ++static enum retbleed_mitigation retbleed_mitigation __ro_after_init = ++ RETBLEED_MITIGATION_NONE; ++static enum retbleed_mitigation_cmd retbleed_cmd __ro_after_init = ++ RETBLEED_CMD_AUTO; ++ ++static int __init retbleed_parse_cmdline(char *str) ++{ ++ if (!str) ++ return -EINVAL; ++ ++ if (!strcmp(str, "off")) ++ retbleed_cmd = RETBLEED_CMD_OFF; ++ else if (!strcmp(str, "auto")) ++ retbleed_cmd = RETBLEED_CMD_AUTO; ++ else if (!strcmp(str, "unret")) ++ retbleed_cmd = RETBLEED_CMD_UNRET; ++ else ++ pr_err("Unknown retbleed option (%s). Defaulting to 'auto'\n", str); ++ ++ return 0; ++} ++early_param("retbleed", retbleed_parse_cmdline); ++ ++#define RETBLEED_UNTRAIN_MSG "WARNING: BTB untrained return thunk mitigation is only effective on AMD/Hygon!\n" ++#define RETBLEED_COMPILER_MSG "WARNING: kernel not compiled with RETPOLINE or -mfunction-return capable compiler!\n" ++ ++static void __init retbleed_select_mitigation(void) ++{ ++ if (!boot_cpu_has_bug(X86_BUG_RETBLEED) || cpu_mitigations_off()) ++ return; ++ ++ switch (retbleed_cmd) { ++ case RETBLEED_CMD_OFF: ++ return; ++ ++ case RETBLEED_CMD_UNRET: ++ retbleed_mitigation = RETBLEED_MITIGATION_UNRET; ++ break; ++ ++ case RETBLEED_CMD_AUTO: ++ default: ++ if (!boot_cpu_has_bug(X86_BUG_RETBLEED)) ++ break; ++ ++ if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD || ++ boot_cpu_data.x86_vendor == X86_VENDOR_HYGON) ++ retbleed_mitigation = RETBLEED_MITIGATION_UNRET; ++ break; ++ } ++ ++ switch (retbleed_mitigation) { ++ case RETBLEED_MITIGATION_UNRET: ++ ++ if (!IS_ENABLED(CONFIG_RETPOLINE) || ++ !IS_ENABLED(CONFIG_CC_HAS_RETURN_THUNK)) { ++ pr_err(RETBLEED_COMPILER_MSG); ++ retbleed_mitigation = RETBLEED_MITIGATION_NONE; ++ break; ++ } ++ ++ setup_force_cpu_cap(X86_FEATURE_RETHUNK); ++ setup_force_cpu_cap(X86_FEATURE_UNRET); ++ ++ if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD && ++ boot_cpu_data.x86_vendor != X86_VENDOR_HYGON) ++ pr_err(RETBLEED_UNTRAIN_MSG); ++ break; ++ ++ default: ++ break; ++ } ++ ++ pr_info("%s\n", retbleed_strings[retbleed_mitigation]); ++} ++ + #undef pr_fmt + #define pr_fmt(fmt) "Spectre V2 : " fmt + +@@ -1996,7 +2097,12 @@ static ssize_t srbds_show_state(char *buf) + + static ssize_t retbleed_show_state(char *buf) + { +- return sprintf(buf, "Vulnerable\n"); ++ if (retbleed_mitigation == RETBLEED_MITIGATION_UNRET && ++ (boot_cpu_data.x86_vendor != X86_VENDOR_AMD && ++ boot_cpu_data.x86_vendor != X86_VENDOR_HYGON)) ++ return sprintf(buf, "Vulnerable: untrained return thunk on non-Zen uarch\n"); ++ ++ return sprintf(buf, "%s\n", retbleed_strings[retbleed_mitigation]); + } + + static ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr, diff --git a/debian/patches/bugfix/x86/retbleed/0020-x86-bugs-Enable-STIBP-for-JMP2RET.patch b/debian/patches/bugfix/x86/retbleed/0020-x86-bugs-Enable-STIBP-for-JMP2RET.patch new file mode 100644 index 000000000..cc1555778 --- /dev/null +++ b/debian/patches/bugfix/x86/retbleed/0020-x86-bugs-Enable-STIBP-for-JMP2RET.patch @@ -0,0 +1,140 @@ +From: Kim Phillips <kim.phillips@amd.com> +Date: Tue, 14 Jun 2022 23:15:51 +0200 +Subject: x86/bugs: Enable STIBP for JMP2RET +Origin: https://git.kernel.org/linus/e8ec1b6e08a2102d8755ccb06fa26d540f26a2fa + +For untrained return thunks to be fully effective, STIBP must be enabled +or SMT disabled. + +Co-developed-by: Josh Poimboeuf <jpoimboe@redhat.com> +Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com> +Signed-off-by: Kim Phillips <kim.phillips@amd.com> +Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> +Signed-off-by: Borislav Petkov <bp@suse.de> +--- + .../admin-guide/kernel-parameters.txt | 16 +++-- + arch/x86/kernel/cpu/bugs.c | 58 +++++++++++++++---- + 2 files changed, 57 insertions(+), 17 deletions(-) + +diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt +index 529613367e7d..168e164f09eb 100644 +--- a/Documentation/admin-guide/kernel-parameters.txt ++++ b/Documentation/admin-guide/kernel-parameters.txt +@@ -5201,11 +5201,17 @@ + Speculative Code Execution with Return Instructions) + vulnerability. + +- off - unconditionally disable +- auto - automatically select a migitation +- unret - force enable untrained return thunks, +- only effective on AMD Zen {1,2} +- based systems. ++ off - no mitigation ++ auto - automatically select a migitation ++ auto,nosmt - automatically select a mitigation, ++ disabling SMT if necessary for ++ the full mitigation (only on Zen1 ++ and older without STIBP). ++ unret - force enable untrained return thunks, ++ only effective on AMD f15h-f17h ++ based systems. ++ unret,nosmt - like unret, will disable SMT when STIBP ++ is not available. + + Selecting 'auto' will choose a mitigation method at run + time according to the CPU. +diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c +index bdeb732ff2d9..fb249b2c1eb0 100644 +--- a/arch/x86/kernel/cpu/bugs.c ++++ b/arch/x86/kernel/cpu/bugs.c +@@ -783,19 +783,34 @@ static enum retbleed_mitigation retbleed_mitigation __ro_after_init = + static enum retbleed_mitigation_cmd retbleed_cmd __ro_after_init = + RETBLEED_CMD_AUTO; + ++static int __ro_after_init retbleed_nosmt = false; ++ + static int __init retbleed_parse_cmdline(char *str) + { + if (!str) + return -EINVAL; + +- if (!strcmp(str, "off")) +- retbleed_cmd = RETBLEED_CMD_OFF; +- else if (!strcmp(str, "auto")) +- retbleed_cmd = RETBLEED_CMD_AUTO; +- else if (!strcmp(str, "unret")) +- retbleed_cmd = RETBLEED_CMD_UNRET; +- else +- pr_err("Unknown retbleed option (%s). Defaulting to 'auto'\n", str); ++ while (str) { ++ char *next = strchr(str, ','); ++ if (next) { ++ *next = 0; ++ next++; ++ } ++ ++ if (!strcmp(str, "off")) { ++ retbleed_cmd = RETBLEED_CMD_OFF; ++ } else if (!strcmp(str, "auto")) { ++ retbleed_cmd = RETBLEED_CMD_AUTO; ++ } else if (!strcmp(str, "unret")) { ++ retbleed_cmd = RETBLEED_CMD_UNRET; ++ } else if (!strcmp(str, "nosmt")) { ++ retbleed_nosmt = true; ++ } else { ++ pr_err("Ignoring unknown retbleed option (%s).", str); ++ } ++ ++ str = next; ++ } + + return 0; + } +@@ -841,6 +856,10 @@ static void __init retbleed_select_mitigation(void) + setup_force_cpu_cap(X86_FEATURE_RETHUNK); + setup_force_cpu_cap(X86_FEATURE_UNRET); + ++ if (!boot_cpu_has(X86_FEATURE_STIBP) && ++ (retbleed_nosmt || cpu_mitigations_auto_nosmt())) ++ cpu_smt_disable(false); ++ + if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD && + boot_cpu_data.x86_vendor != X86_VENDOR_HYGON) + pr_err(RETBLEED_UNTRAIN_MSG); +@@ -1087,6 +1106,13 @@ spectre_v2_user_select_mitigation(enum spectre_v2_mitigation_cmd v2_cmd) + boot_cpu_has(X86_FEATURE_AMD_STIBP_ALWAYS_ON)) + mode = SPECTRE_V2_USER_STRICT_PREFERRED; + ++ if (retbleed_mitigation == RETBLEED_MITIGATION_UNRET) { ++ if (mode != SPECTRE_V2_USER_STRICT && ++ mode != SPECTRE_V2_USER_STRICT_PREFERRED) ++ pr_info("Selecting STIBP always-on mode to complement retbleed mitigation'\n"); ++ mode = SPECTRE_V2_USER_STRICT_PREFERRED; ++ } ++ + spectre_v2_user_stibp = mode; + + set_mode: +@@ -2097,10 +2123,18 @@ static ssize_t srbds_show_state(char *buf) + + static ssize_t retbleed_show_state(char *buf) + { +- if (retbleed_mitigation == RETBLEED_MITIGATION_UNRET && +- (boot_cpu_data.x86_vendor != X86_VENDOR_AMD && +- boot_cpu_data.x86_vendor != X86_VENDOR_HYGON)) +- return sprintf(buf, "Vulnerable: untrained return thunk on non-Zen uarch\n"); ++ if (retbleed_mitigation == RETBLEED_MITIGATION_UNRET) { ++ if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD && ++ boot_cpu_data.x86_vendor != X86_VENDOR_HYGON) ++ return sprintf(buf, "Vulnerable: untrained return thunk on non-Zen uarch\n"); ++ ++ return sprintf(buf, "%s; SMT %s\n", ++ retbleed_strings[retbleed_mitigation], ++ !sched_smt_active() ? "disabled" : ++ spectre_v2_user_stibp == SPECTRE_V2_USER_STRICT || ++ spectre_v2_user_stibp == SPECTRE_V2_USER_STRICT_PREFERRED ? ++ "enabled with STIBP protection" : "vulnerable"); ++ } + + return sprintf(buf, "%s\n", retbleed_strings[retbleed_mitigation]); + } diff --git a/debian/patches/bugfix/x86/retbleed/0021-x86-bugs-Keep-a-per-CPU-IA32_SPEC_CTRL-value.patch b/debian/patches/bugfix/x86/retbleed/0021-x86-bugs-Keep-a-per-CPU-IA32_SPEC_CTRL-value.patch new file mode 100644 index 000000000..6082362e5 --- /dev/null +++ b/debian/patches/bugfix/x86/retbleed/0021-x86-bugs-Keep-a-per-CPU-IA32_SPEC_CTRL-value.patch @@ -0,0 +1,117 @@ +From: Peter Zijlstra <peterz@infradead.org> +Date: Tue, 14 Jun 2022 23:15:52 +0200 +Subject: x86/bugs: Keep a per-CPU IA32_SPEC_CTRL value +Origin: https://git.kernel.org/linus/caa0ff24d5d0e02abce5e65c3d2b7f20a6617be5 + +Due to TIF_SSBD and TIF_SPEC_IB the actual IA32_SPEC_CTRL value can +differ from x86_spec_ctrl_base. As such, keep a per-CPU value +reflecting the current task's MSR content. + + [jpoimboe: rename] + +Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> +Signed-off-by: Borislav Petkov <bp@suse.de> +Reviewed-by: Josh Poimboeuf <jpoimboe@kernel.org> +Signed-off-by: Borislav Petkov <bp@suse.de> +--- + arch/x86/include/asm/nospec-branch.h | 1 + + arch/x86/kernel/cpu/bugs.c | 28 +++++++++++++++++++++++----- + arch/x86/kernel/process.c | 2 +- + 3 files changed, 25 insertions(+), 6 deletions(-) + +diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h +index 5ca60ae0d14f..bac243da5130 100644 +--- a/arch/x86/include/asm/nospec-branch.h ++++ b/arch/x86/include/asm/nospec-branch.h +@@ -253,6 +253,7 @@ static inline void indirect_branch_prediction_barrier(void) + + /* The Intel SPEC CTRL MSR base value cache */ + extern u64 x86_spec_ctrl_base; ++extern void write_spec_ctrl_current(u64 val); + + /* + * With retpoline, we must use IBRS to restrict branch prediction +diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c +index fb249b2c1eb0..ce7435593c3e 100644 +--- a/arch/x86/kernel/cpu/bugs.c ++++ b/arch/x86/kernel/cpu/bugs.c +@@ -49,11 +49,29 @@ static void __init mmio_select_mitigation(void); + static void __init srbds_select_mitigation(void); + static void __init l1d_flush_select_mitigation(void); + +-/* The base value of the SPEC_CTRL MSR that always has to be preserved. */ ++/* The base value of the SPEC_CTRL MSR without task-specific bits set */ + u64 x86_spec_ctrl_base; + EXPORT_SYMBOL_GPL(x86_spec_ctrl_base); ++ ++/* The current value of the SPEC_CTRL MSR with task-specific bits set */ ++DEFINE_PER_CPU(u64, x86_spec_ctrl_current); ++EXPORT_SYMBOL_GPL(x86_spec_ctrl_current); ++ + static DEFINE_MUTEX(spec_ctrl_mutex); + ++/* ++ * Keep track of the SPEC_CTRL MSR value for the current task, which may differ ++ * from x86_spec_ctrl_base due to STIBP/SSB in __speculation_ctrl_update(). ++ */ ++void write_spec_ctrl_current(u64 val) ++{ ++ if (this_cpu_read(x86_spec_ctrl_current) == val) ++ return; ++ ++ this_cpu_write(x86_spec_ctrl_current, val); ++ wrmsrl(MSR_IA32_SPEC_CTRL, val); ++} ++ + /* + * The vendor and possibly platform specific bits which can be modified in + * x86_spec_ctrl_base. +@@ -1279,7 +1297,7 @@ static void __init spectre_v2_select_mitigation(void) + if (spectre_v2_in_eibrs_mode(mode)) { + /* Force it so VMEXIT will restore correctly */ + x86_spec_ctrl_base |= SPEC_CTRL_IBRS; +- wrmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base); ++ write_spec_ctrl_current(x86_spec_ctrl_base); + } + + switch (mode) { +@@ -1334,7 +1352,7 @@ static void __init spectre_v2_select_mitigation(void) + + static void update_stibp_msr(void * __unused) + { +- wrmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base); ++ write_spec_ctrl_current(x86_spec_ctrl_base); + } + + /* Update x86_spec_ctrl_base in case SMT state changed. */ +@@ -1577,7 +1595,7 @@ static enum ssb_mitigation __init __ssb_select_mitigation(void) + x86_amd_ssb_disable(); + } else { + x86_spec_ctrl_base |= SPEC_CTRL_SSBD; +- wrmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base); ++ write_spec_ctrl_current(x86_spec_ctrl_base); + } + } + +@@ -1828,7 +1846,7 @@ int arch_prctl_spec_ctrl_get(struct task_struct *task, unsigned long which) + void x86_spec_ctrl_setup_ap(void) + { + if (boot_cpu_has(X86_FEATURE_MSR_SPEC_CTRL)) +- wrmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base); ++ write_spec_ctrl_current(x86_spec_ctrl_base); + + if (ssb_mode == SPEC_STORE_BYPASS_DISABLE) + x86_amd_ssb_disable(); +diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c +index 9b2772b7e1f3..05611f66d013 100644 +--- a/arch/x86/kernel/process.c ++++ b/arch/x86/kernel/process.c +@@ -600,7 +600,7 @@ static __always_inline void __speculation_ctrl_update(unsigned long tifp, + } + + if (updmsr) +- wrmsrl(MSR_IA32_SPEC_CTRL, msr); ++ write_spec_ctrl_current(msr); + } + + static unsigned long speculation_ctrl_update_tif(struct task_struct *tsk) diff --git a/debian/patches/bugfix/x86/retbleed/0022-x86-entry-Add-kernel-IBRS-implementation.patch b/debian/patches/bugfix/x86/retbleed/0022-x86-entry-Add-kernel-IBRS-implementation.patch new file mode 100644 index 000000000..14731c189 --- /dev/null +++ b/debian/patches/bugfix/x86/retbleed/0022-x86-entry-Add-kernel-IBRS-implementation.patch @@ -0,0 +1,351 @@ +From: Peter Zijlstra <peterz@infradead.org> +Date: Tue, 14 Jun 2022 23:15:53 +0200 +Subject: x86/entry: Add kernel IBRS implementation +Origin: https://git.kernel.org/linus/2dbb887e875b1de3ca8f40ddf26bcfe55798c609 + +Implement Kernel IBRS - currently the only known option to mitigate RSB +underflow speculation issues on Skylake hardware. + +Note: since IBRS_ENTER requires fuller context established than +UNTRAIN_RET, it must be placed after it. However, since UNTRAIN_RET +itself implies a RET, it must come after IBRS_ENTER. This means +IBRS_ENTER needs to also move UNTRAIN_RET. + +Note 2: KERNEL_IBRS is sub-optimal for XenPV. + +Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> +Signed-off-by: Borislav Petkov <bp@suse.de> +Reviewed-by: Josh Poimboeuf <jpoimboe@kernel.org> +Signed-off-by: Borislav Petkov <bp@suse.de> +--- + arch/x86/entry/calling.h | 58 ++++++++++++++++++++++++++++++ + arch/x86/entry/entry_64.S | 44 ++++++++++++++++++++--- + arch/x86/entry/entry_64_compat.S | 17 ++++++--- + arch/x86/include/asm/cpufeatures.h | 2 +- + 4 files changed, 111 insertions(+), 10 deletions(-) + +diff --git a/arch/x86/entry/calling.h b/arch/x86/entry/calling.h +index 29b36e9e4e74..074d04e434de 100644 +--- a/arch/x86/entry/calling.h ++++ b/arch/x86/entry/calling.h +@@ -7,6 +7,8 @@ + #include <asm/asm-offsets.h> + #include <asm/processor-flags.h> + #include <asm/ptrace-abi.h> ++#include <asm/msr.h> ++#include <asm/nospec-branch.h> + + /* + +@@ -282,6 +284,62 @@ For 32-bit we have the following conventions - kernel is built with + + #endif + ++/* ++ * IBRS kernel mitigation for Spectre_v2. ++ * ++ * Assumes full context is established (PUSH_REGS, CR3 and GS) and it clobbers ++ * the regs it uses (AX, CX, DX). Must be called before the first RET ++ * instruction (NOTE! UNTRAIN_RET includes a RET instruction) ++ * ++ * The optional argument is used to save/restore the current value, ++ * which is used on the paranoid paths. ++ * ++ * Assumes x86_spec_ctrl_{base,current} to have SPEC_CTRL_IBRS set. ++ */ ++.macro IBRS_ENTER save_reg ++ ALTERNATIVE "jmp .Lend_\@", "", X86_FEATURE_KERNEL_IBRS ++ movl $MSR_IA32_SPEC_CTRL, %ecx ++ ++.ifnb \save_reg ++ rdmsr ++ shl $32, %rdx ++ or %rdx, %rax ++ mov %rax, \save_reg ++ test $SPEC_CTRL_IBRS, %eax ++ jz .Ldo_wrmsr_\@ ++ lfence ++ jmp .Lend_\@ ++.Ldo_wrmsr_\@: ++.endif ++ ++ movq PER_CPU_VAR(x86_spec_ctrl_current), %rdx ++ movl %edx, %eax ++ shr $32, %rdx ++ wrmsr ++.Lend_\@: ++.endm ++ ++/* ++ * Similar to IBRS_ENTER, requires KERNEL GS,CR3 and clobbers (AX, CX, DX) ++ * regs. Must be called after the last RET. ++ */ ++.macro IBRS_EXIT save_reg ++ ALTERNATIVE "jmp .Lend_\@", "", X86_FEATURE_KERNEL_IBRS ++ movl $MSR_IA32_SPEC_CTRL, %ecx ++ ++.ifnb \save_reg ++ mov \save_reg, %rdx ++.else ++ movq PER_CPU_VAR(x86_spec_ctrl_current), %rdx ++ andl $(~SPEC_CTRL_IBRS), %edx ++.endif ++ ++ movl %edx, %eax ++ shr $32, %rdx ++ wrmsr ++.Lend_\@: ++.endm ++ + /* + * Mitigate Spectre v1 for conditional swapgs code paths. + * +diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S +index 1f4b18c8909b..f4384a29086f 100644 +--- a/arch/x86/entry/entry_64.S ++++ b/arch/x86/entry/entry_64.S +@@ -96,7 +96,6 @@ SYM_CODE_START(entry_SYSCALL_64) + + SYM_INNER_LABEL(entry_SYSCALL_64_safe_stack, SYM_L_GLOBAL) + ANNOTATE_NOENDBR +- UNTRAIN_RET + + /* Construct struct pt_regs on stack */ + pushq $__USER_DS /* pt_regs->ss */ +@@ -113,6 +112,11 @@ SYM_INNER_LABEL(entry_SYSCALL_64_after_hwframe, SYM_L_GLOBAL) + movq %rsp, %rdi + /* Sign extend the lower 32bit as syscall numbers are treated as int */ + movslq %eax, %rsi ++ ++ /* clobbers %rax, make sure it is after saving the syscall nr */ ++ IBRS_ENTER ++ UNTRAIN_RET ++ + call do_syscall_64 /* returns with IRQs disabled */ + + /* +@@ -192,6 +196,7 @@ SYM_INNER_LABEL(entry_SYSCALL_64_after_hwframe, SYM_L_GLOBAL) + * perf profiles. Nothing jumps here. + */ + syscall_return_via_sysret: ++ IBRS_EXIT + POP_REGS pop_rdi=0 + + /* +@@ -605,6 +610,7 @@ SYM_CODE_END(\asmsym) + + SYM_CODE_START_LOCAL(common_interrupt_return) + SYM_INNER_LABEL(swapgs_restore_regs_and_return_to_usermode, SYM_L_GLOBAL) ++ IBRS_EXIT + #ifdef CONFIG_DEBUG_ENTRY + /* Assert that pt_regs indicates user mode. */ + testb $3, CS(%rsp) +@@ -891,6 +897,9 @@ SYM_CODE_END(xen_failsafe_callback) + * 1 -> no SWAPGS on exit + * + * Y GSBASE value at entry, must be restored in paranoid_exit ++ * ++ * R14 - old CR3 ++ * R15 - old SPEC_CTRL + */ + SYM_CODE_START_LOCAL(paranoid_entry) + UNWIND_HINT_FUNC +@@ -913,7 +922,6 @@ SYM_CODE_START_LOCAL(paranoid_entry) + * be retrieved from a kernel internal table. + */ + SAVE_AND_SWITCH_TO_KERNEL_CR3 scratch_reg=%rax save_reg=%r14 +- UNTRAIN_RET + + /* + * Handling GSBASE depends on the availability of FSGSBASE. +@@ -935,7 +943,7 @@ SYM_CODE_START_LOCAL(paranoid_entry) + * is needed here. + */ + SAVE_AND_SET_GSBASE scratch_reg=%rax save_reg=%rbx +- RET ++ jmp .Lparanoid_gsbase_done + + .Lparanoid_entry_checkgs: + /* EBX = 1 -> kernel GSBASE active, no restore required */ +@@ -954,8 +962,16 @@ SYM_CODE_START_LOCAL(paranoid_entry) + xorl %ebx, %ebx + swapgs + .Lparanoid_kernel_gsbase: +- + FENCE_SWAPGS_KERNEL_ENTRY ++.Lparanoid_gsbase_done: ++ ++ /* ++ * Once we have CR3 and %GS setup save and set SPEC_CTRL. Just like ++ * CR3 above, keep the old value in a callee saved register. ++ */ ++ IBRS_ENTER save_reg=%r15 ++ UNTRAIN_RET ++ + RET + SYM_CODE_END(paranoid_entry) + +@@ -977,9 +993,19 @@ SYM_CODE_END(paranoid_entry) + * 1 -> no SWAPGS on exit + * + * Y User space GSBASE, must be restored unconditionally ++ * ++ * R14 - old CR3 ++ * R15 - old SPEC_CTRL + */ + SYM_CODE_START_LOCAL(paranoid_exit) + UNWIND_HINT_REGS ++ ++ /* ++ * Must restore IBRS state before both CR3 and %GS since we need access ++ * to the per-CPU x86_spec_ctrl_shadow variable. ++ */ ++ IBRS_EXIT save_reg=%r15 ++ + /* + * The order of operations is important. RESTORE_CR3 requires + * kernel GSBASE. +@@ -1023,10 +1049,12 @@ SYM_CODE_START_LOCAL(error_entry) + FENCE_SWAPGS_USER_ENTRY + /* We have user CR3. Change to kernel CR3. */ + SWITCH_TO_KERNEL_CR3 scratch_reg=%rax ++ IBRS_ENTER + UNTRAIN_RET + + leaq 8(%rsp), %rdi /* arg0 = pt_regs pointer */ + .Lerror_entry_from_usermode_after_swapgs: ++ + /* Put us onto the real thread stack. */ + call sync_regs + RET +@@ -1076,6 +1104,7 @@ SYM_CODE_START_LOCAL(error_entry) + swapgs + FENCE_SWAPGS_USER_ENTRY + SWITCH_TO_KERNEL_CR3 scratch_reg=%rax ++ IBRS_ENTER + UNTRAIN_RET + + /* +@@ -1172,7 +1201,6 @@ SYM_CODE_START(asm_exc_nmi) + movq %rsp, %rdx + movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp + UNWIND_HINT_IRET_REGS base=%rdx offset=8 +- UNTRAIN_RET + pushq 5*8(%rdx) /* pt_regs->ss */ + pushq 4*8(%rdx) /* pt_regs->rsp */ + pushq 3*8(%rdx) /* pt_regs->flags */ +@@ -1183,6 +1211,9 @@ SYM_CODE_START(asm_exc_nmi) + PUSH_AND_CLEAR_REGS rdx=(%rdx) + ENCODE_FRAME_POINTER + ++ IBRS_ENTER ++ UNTRAIN_RET ++ + /* + * At this point we no longer need to worry about stack damage + * due to nesting -- we're on the normal thread stack and we're +@@ -1407,6 +1438,9 @@ SYM_CODE_START(asm_exc_nmi) + movq $-1, %rsi + call exc_nmi + ++ /* Always restore stashed SPEC_CTRL value (see paranoid_entry) */ ++ IBRS_EXIT save_reg=%r15 ++ + /* Always restore stashed CR3 value (see paranoid_entry) */ + RESTORE_CR3 scratch_reg=%r15 save_reg=%r14 + +diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S +index 03d74c5153fb..bcb89d23ac0e 100644 +--- a/arch/x86/entry/entry_64_compat.S ++++ b/arch/x86/entry/entry_64_compat.S +@@ -4,7 +4,6 @@ + * + * Copyright 2000-2002 Andi Kleen, SuSE Labs. + */ +-#include "calling.h" + #include <asm/asm-offsets.h> + #include <asm/current.h> + #include <asm/errno.h> +@@ -18,6 +17,8 @@ + #include <linux/linkage.h> + #include <linux/err.h> + ++#include "calling.h" ++ + .section .entry.text, "ax" + + /* +@@ -73,7 +74,6 @@ SYM_CODE_START(entry_SYSENTER_compat) + pushq $__USER32_CS /* pt_regs->cs */ + pushq $0 /* pt_regs->ip = 0 (placeholder) */ + SYM_INNER_LABEL(entry_SYSENTER_compat_after_hwframe, SYM_L_GLOBAL) +- UNTRAIN_RET + + /* + * User tracing code (ptrace or signal handlers) might assume that +@@ -90,6 +90,9 @@ SYM_INNER_LABEL(entry_SYSENTER_compat_after_hwframe, SYM_L_GLOBAL) + + cld + ++ IBRS_ENTER ++ UNTRAIN_RET ++ + /* + * SYSENTER doesn't filter flags, so we need to clear NT and AC + * ourselves. To save a few cycles, we can check whether +@@ -192,7 +195,6 @@ SYM_CODE_START(entry_SYSCALL_compat) + + SYM_INNER_LABEL(entry_SYSCALL_compat_safe_stack, SYM_L_GLOBAL) + ANNOTATE_NOENDBR +- UNTRAIN_RET + + /* Construct struct pt_regs on stack */ + pushq $__USER32_DS /* pt_regs->ss */ +@@ -206,6 +208,9 @@ SYM_INNER_LABEL(entry_SYSCALL_compat_after_hwframe, SYM_L_GLOBAL) + PUSH_AND_CLEAR_REGS rcx=%rbp rax=$-ENOSYS + UNWIND_HINT_REGS + ++ IBRS_ENTER ++ UNTRAIN_RET ++ + movq %rsp, %rdi + call do_fast_syscall_32 + /* XEN PV guests always use IRET path */ +@@ -220,6 +225,8 @@ SYM_INNER_LABEL(entry_SYSCALL_compat_after_hwframe, SYM_L_GLOBAL) + */ + STACKLEAK_ERASE + ++ IBRS_EXIT ++ + movq RBX(%rsp), %rbx /* pt_regs->rbx */ + movq RBP(%rsp), %rbp /* pt_regs->rbp */ + movq EFLAGS(%rsp), %r11 /* pt_regs->flags (in r11) */ +@@ -335,12 +342,14 @@ SYM_CODE_START(entry_INT80_compat) + pushq 0*8(%rax) /* regs->orig_ax */ + .Lint80_keep_stack: + +- UNTRAIN_RET + PUSH_AND_CLEAR_REGS rax=$-ENOSYS + UNWIND_HINT_REGS + + cld + ++ IBRS_ENTER ++ UNTRAIN_RET ++ + movq %rsp, %rdi + call do_int80_syscall_32 + jmp swapgs_restore_regs_and_return_to_usermode +diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h +index c16503ca3b75..b5bb35e8588e 100644 +--- a/arch/x86/include/asm/cpufeatures.h ++++ b/arch/x86/include/asm/cpufeatures.h +@@ -203,7 +203,7 @@ + #define X86_FEATURE_PROC_FEEDBACK ( 7*32+ 9) /* AMD ProcFeedbackInterface */ + #define X86_FEATURE_XCOMPACTED ( 7*32+10) /* "" Use compacted XSTATE (XSAVES or XSAVEC) */ + #define X86_FEATURE_PTI ( 7*32+11) /* Kernel Page Table Isolation enabled */ +-/* FREE! ( 7*32+12) */ ++#define X86_FEATURE_KERNEL_IBRS ( 7*32+12) /* "" Set/clear IBRS on kernel entry/exit */ + /* FREE! ( 7*32+13) */ + #define X86_FEATURE_INTEL_PPIN ( 7*32+14) /* Intel Processor Inventory Number */ + #define X86_FEATURE_CDP_L2 ( 7*32+15) /* Code and Data Prioritization L2 */ diff --git a/debian/patches/bugfix/x86/retbleed/0023-x86-bugs-Optimize-SPEC_CTRL-MSR-writes.patch b/debian/patches/bugfix/x86/retbleed/0023-x86-bugs-Optimize-SPEC_CTRL-MSR-writes.patch new file mode 100644 index 000000000..109e922f1 --- /dev/null +++ b/debian/patches/bugfix/x86/retbleed/0023-x86-bugs-Optimize-SPEC_CTRL-MSR-writes.patch @@ -0,0 +1,108 @@ +From: Peter Zijlstra <peterz@infradead.org> +Date: Tue, 14 Jun 2022 23:15:54 +0200 +Subject: x86/bugs: Optimize SPEC_CTRL MSR writes +Origin: https://git.kernel.org/linus/c779bc1a9002fa474175b80e72b85c9bf628abb0 + +When changing SPEC_CTRL for user control, the WRMSR can be delayed +until return-to-user when KERNEL_IBRS has been enabled. + +This avoids an MSR write during context switch. + +Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> +Signed-off-by: Borislav Petkov <bp@suse.de> +Reviewed-by: Josh Poimboeuf <jpoimboe@kernel.org> +Signed-off-by: Borislav Petkov <bp@suse.de> +--- + arch/x86/include/asm/nospec-branch.h | 2 +- + arch/x86/kernel/cpu/bugs.c | 18 ++++++++++++------ + arch/x86/kernel/process.c | 2 +- + 3 files changed, 14 insertions(+), 8 deletions(-) + +diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h +index bac243da5130..b6abf0c6b41d 100644 +--- a/arch/x86/include/asm/nospec-branch.h ++++ b/arch/x86/include/asm/nospec-branch.h +@@ -253,7 +253,7 @@ static inline void indirect_branch_prediction_barrier(void) + + /* The Intel SPEC CTRL MSR base value cache */ + extern u64 x86_spec_ctrl_base; +-extern void write_spec_ctrl_current(u64 val); ++extern void write_spec_ctrl_current(u64 val, bool force); + + /* + * With retpoline, we must use IBRS to restrict branch prediction +diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c +index ce7435593c3e..8f7c8dfbdbb4 100644 +--- a/arch/x86/kernel/cpu/bugs.c ++++ b/arch/x86/kernel/cpu/bugs.c +@@ -63,13 +63,19 @@ static DEFINE_MUTEX(spec_ctrl_mutex); + * Keep track of the SPEC_CTRL MSR value for the current task, which may differ + * from x86_spec_ctrl_base due to STIBP/SSB in __speculation_ctrl_update(). + */ +-void write_spec_ctrl_current(u64 val) ++void write_spec_ctrl_current(u64 val, bool force) + { + if (this_cpu_read(x86_spec_ctrl_current) == val) + return; + + this_cpu_write(x86_spec_ctrl_current, val); +- wrmsrl(MSR_IA32_SPEC_CTRL, val); ++ ++ /* ++ * When KERNEL_IBRS this MSR is written on return-to-user, unless ++ * forced the update can be delayed until that time. ++ */ ++ if (force || !cpu_feature_enabled(X86_FEATURE_KERNEL_IBRS)) ++ wrmsrl(MSR_IA32_SPEC_CTRL, val); + } + + /* +@@ -1297,7 +1303,7 @@ static void __init spectre_v2_select_mitigation(void) + if (spectre_v2_in_eibrs_mode(mode)) { + /* Force it so VMEXIT will restore correctly */ + x86_spec_ctrl_base |= SPEC_CTRL_IBRS; +- write_spec_ctrl_current(x86_spec_ctrl_base); ++ write_spec_ctrl_current(x86_spec_ctrl_base, true); + } + + switch (mode) { +@@ -1352,7 +1358,7 @@ static void __init spectre_v2_select_mitigation(void) + + static void update_stibp_msr(void * __unused) + { +- write_spec_ctrl_current(x86_spec_ctrl_base); ++ write_spec_ctrl_current(x86_spec_ctrl_base, true); + } + + /* Update x86_spec_ctrl_base in case SMT state changed. */ +@@ -1595,7 +1601,7 @@ static enum ssb_mitigation __init __ssb_select_mitigation(void) + x86_amd_ssb_disable(); + } else { + x86_spec_ctrl_base |= SPEC_CTRL_SSBD; +- write_spec_ctrl_current(x86_spec_ctrl_base); ++ write_spec_ctrl_current(x86_spec_ctrl_base, true); + } + } + +@@ -1846,7 +1852,7 @@ int arch_prctl_spec_ctrl_get(struct task_struct *task, unsigned long which) + void x86_spec_ctrl_setup_ap(void) + { + if (boot_cpu_has(X86_FEATURE_MSR_SPEC_CTRL)) +- write_spec_ctrl_current(x86_spec_ctrl_base); ++ write_spec_ctrl_current(x86_spec_ctrl_base, true); + + if (ssb_mode == SPEC_STORE_BYPASS_DISABLE) + x86_amd_ssb_disable(); +diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c +index 05611f66d013..d456ce21c255 100644 +--- a/arch/x86/kernel/process.c ++++ b/arch/x86/kernel/process.c +@@ -600,7 +600,7 @@ static __always_inline void __speculation_ctrl_update(unsigned long tifp, + } + + if (updmsr) +- write_spec_ctrl_current(msr); ++ write_spec_ctrl_current(msr, false); + } + + static unsigned long speculation_ctrl_update_tif(struct task_struct *tsk) diff --git a/debian/patches/bugfix/x86/retbleed/0024-x86-speculation-Add-spectre_v2-ibrs-option-to-suppor.patch b/debian/patches/bugfix/x86/retbleed/0024-x86-speculation-Add-spectre_v2-ibrs-option-to-suppor.patch new file mode 100644 index 000000000..cbcc64169 --- /dev/null +++ b/debian/patches/bugfix/x86/retbleed/0024-x86-speculation-Add-spectre_v2-ibrs-option-to-suppor.patch @@ -0,0 +1,208 @@ +From: Pawan Gupta <pawan.kumar.gupta@linux.intel.com> +Date: Tue, 14 Jun 2022 23:15:55 +0200 +Subject: x86/speculation: Add spectre_v2=ibrs option to support Kernel IBRS +Origin: https://git.kernel.org/linus/7c693f54c873691a4b7da05c7e0f74e67745d144 + +Extend spectre_v2= boot option with Kernel IBRS. + + [jpoimboe: no STIBP with IBRS] + +Signed-off-by: Pawan Gupta <pawan.kumar.gupta@linux.intel.com> +Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> +Signed-off-by: Borislav Petkov <bp@suse.de> +Reviewed-by: Josh Poimboeuf <jpoimboe@kernel.org> +Signed-off-by: Borislav Petkov <bp@suse.de> +--- + .../admin-guide/kernel-parameters.txt | 1 + + arch/x86/include/asm/nospec-branch.h | 1 + + arch/x86/kernel/cpu/bugs.c | 66 +++++++++++++++---- + 3 files changed, 54 insertions(+), 14 deletions(-) + +diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt +index 168e164f09eb..0aeea69e9b7a 100644 +--- a/Documentation/admin-guide/kernel-parameters.txt ++++ b/Documentation/admin-guide/kernel-parameters.txt +@@ -5589,6 +5589,7 @@ + eibrs - enhanced IBRS + eibrs,retpoline - enhanced IBRS + Retpolines + eibrs,lfence - enhanced IBRS + LFENCE ++ ibrs - use IBRS to protect kernel + + Not specifying this option is equivalent to + spectre_v2=auto. +diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h +index b6abf0c6b41d..e14046daa7ba 100644 +--- a/arch/x86/include/asm/nospec-branch.h ++++ b/arch/x86/include/asm/nospec-branch.h +@@ -211,6 +211,7 @@ enum spectre_v2_mitigation { + SPECTRE_V2_EIBRS, + SPECTRE_V2_EIBRS_RETPOLINE, + SPECTRE_V2_EIBRS_LFENCE, ++ SPECTRE_V2_IBRS, + }; + + /* The indirect branch speculation control variants */ +diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c +index 8f7c8dfbdbb4..2d7896493220 100644 +--- a/arch/x86/kernel/cpu/bugs.c ++++ b/arch/x86/kernel/cpu/bugs.c +@@ -972,6 +972,7 @@ enum spectre_v2_mitigation_cmd { + SPECTRE_V2_CMD_EIBRS, + SPECTRE_V2_CMD_EIBRS_RETPOLINE, + SPECTRE_V2_CMD_EIBRS_LFENCE, ++ SPECTRE_V2_CMD_IBRS, + }; + + enum spectre_v2_user_cmd { +@@ -1044,11 +1045,12 @@ spectre_v2_parse_user_cmdline(enum spectre_v2_mitigation_cmd v2_cmd) + return SPECTRE_V2_USER_CMD_AUTO; + } + +-static inline bool spectre_v2_in_eibrs_mode(enum spectre_v2_mitigation mode) ++static inline bool spectre_v2_in_ibrs_mode(enum spectre_v2_mitigation mode) + { +- return (mode == SPECTRE_V2_EIBRS || +- mode == SPECTRE_V2_EIBRS_RETPOLINE || +- mode == SPECTRE_V2_EIBRS_LFENCE); ++ return mode == SPECTRE_V2_IBRS || ++ mode == SPECTRE_V2_EIBRS || ++ mode == SPECTRE_V2_EIBRS_RETPOLINE || ++ mode == SPECTRE_V2_EIBRS_LFENCE; + } + + static void __init +@@ -1113,12 +1115,12 @@ spectre_v2_user_select_mitigation(enum spectre_v2_mitigation_cmd v2_cmd) + } + + /* +- * If no STIBP, enhanced IBRS is enabled or SMT impossible, STIBP is not +- * required. ++ * If no STIBP, IBRS or enhanced IBRS is enabled, or SMT impossible, ++ * STIBP is not required. + */ + if (!boot_cpu_has(X86_FEATURE_STIBP) || + !smt_possible || +- spectre_v2_in_eibrs_mode(spectre_v2_enabled)) ++ spectre_v2_in_ibrs_mode(spectre_v2_enabled)) + return; + + /* +@@ -1150,6 +1152,7 @@ static const char * const spectre_v2_strings[] = { + [SPECTRE_V2_EIBRS] = "Mitigation: Enhanced IBRS", + [SPECTRE_V2_EIBRS_LFENCE] = "Mitigation: Enhanced IBRS + LFENCE", + [SPECTRE_V2_EIBRS_RETPOLINE] = "Mitigation: Enhanced IBRS + Retpolines", ++ [SPECTRE_V2_IBRS] = "Mitigation: IBRS", + }; + + static const struct { +@@ -1167,6 +1170,7 @@ static const struct { + { "eibrs,lfence", SPECTRE_V2_CMD_EIBRS_LFENCE, false }, + { "eibrs,retpoline", SPECTRE_V2_CMD_EIBRS_RETPOLINE, false }, + { "auto", SPECTRE_V2_CMD_AUTO, false }, ++ { "ibrs", SPECTRE_V2_CMD_IBRS, false }, + }; + + static void __init spec_v2_print_cond(const char *reason, bool secure) +@@ -1229,6 +1233,24 @@ static enum spectre_v2_mitigation_cmd __init spectre_v2_parse_cmdline(void) + return SPECTRE_V2_CMD_AUTO; + } + ++ if (cmd == SPECTRE_V2_CMD_IBRS && boot_cpu_data.x86_vendor != X86_VENDOR_INTEL) { ++ pr_err("%s selected but not Intel CPU. Switching to AUTO select\n", ++ mitigation_options[i].option); ++ return SPECTRE_V2_CMD_AUTO; ++ } ++ ++ if (cmd == SPECTRE_V2_CMD_IBRS && !boot_cpu_has(X86_FEATURE_IBRS)) { ++ pr_err("%s selected but CPU doesn't have IBRS. Switching to AUTO select\n", ++ mitigation_options[i].option); ++ return SPECTRE_V2_CMD_AUTO; ++ } ++ ++ if (cmd == SPECTRE_V2_CMD_IBRS && boot_cpu_has(X86_FEATURE_XENPV)) { ++ pr_err("%s selected but running as XenPV guest. Switching to AUTO select\n", ++ mitigation_options[i].option); ++ return SPECTRE_V2_CMD_AUTO; ++ } ++ + spec_v2_print_cond(mitigation_options[i].option, + mitigation_options[i].secure); + return cmd; +@@ -1268,6 +1290,14 @@ static void __init spectre_v2_select_mitigation(void) + break; + } + ++ if (boot_cpu_has_bug(X86_BUG_RETBLEED) && ++ retbleed_cmd != RETBLEED_CMD_OFF && ++ boot_cpu_has(X86_FEATURE_IBRS) && ++ boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) { ++ mode = SPECTRE_V2_IBRS; ++ break; ++ } ++ + mode = spectre_v2_select_retpoline(); + break; + +@@ -1284,6 +1314,10 @@ static void __init spectre_v2_select_mitigation(void) + mode = spectre_v2_select_retpoline(); + break; + ++ case SPECTRE_V2_CMD_IBRS: ++ mode = SPECTRE_V2_IBRS; ++ break; ++ + case SPECTRE_V2_CMD_EIBRS: + mode = SPECTRE_V2_EIBRS; + break; +@@ -1300,7 +1334,7 @@ static void __init spectre_v2_select_mitigation(void) + if (mode == SPECTRE_V2_EIBRS && unprivileged_ebpf_enabled()) + pr_err(SPECTRE_V2_EIBRS_EBPF_MSG); + +- if (spectre_v2_in_eibrs_mode(mode)) { ++ if (spectre_v2_in_ibrs_mode(mode)) { + /* Force it so VMEXIT will restore correctly */ + x86_spec_ctrl_base |= SPEC_CTRL_IBRS; + write_spec_ctrl_current(x86_spec_ctrl_base, true); +@@ -1311,6 +1345,10 @@ static void __init spectre_v2_select_mitigation(void) + case SPECTRE_V2_EIBRS: + break; + ++ case SPECTRE_V2_IBRS: ++ setup_force_cpu_cap(X86_FEATURE_KERNEL_IBRS); ++ break; ++ + case SPECTRE_V2_LFENCE: + case SPECTRE_V2_EIBRS_LFENCE: + setup_force_cpu_cap(X86_FEATURE_RETPOLINE_LFENCE); +@@ -1337,17 +1375,17 @@ static void __init spectre_v2_select_mitigation(void) + pr_info("Spectre v2 / SpectreRSB mitigation: Filling RSB on context switch\n"); + + /* +- * Retpoline means the kernel is safe because it has no indirect +- * branches. Enhanced IBRS protects firmware too, so, enable restricted +- * speculation around firmware calls only when Enhanced IBRS isn't +- * supported. ++ * Retpoline protects the kernel, but doesn't protect firmware. IBRS ++ * and Enhanced IBRS protect firmware too, so enable IBRS around ++ * firmware calls only when IBRS / Enhanced IBRS aren't otherwise ++ * enabled. + * + * Use "mode" to check Enhanced IBRS instead of boot_cpu_has(), because + * the user might select retpoline on the kernel command line and if + * the CPU supports Enhanced IBRS, kernel might un-intentionally not + * enable IBRS around firmware calls. + */ +- if (boot_cpu_has(X86_FEATURE_IBRS) && !spectre_v2_in_eibrs_mode(mode)) { ++ if (boot_cpu_has(X86_FEATURE_IBRS) && !spectre_v2_in_ibrs_mode(mode)) { + setup_force_cpu_cap(X86_FEATURE_USE_IBRS_FW); + pr_info("Enabling Restricted Speculation for firmware calls\n"); + } +@@ -2089,7 +2127,7 @@ static ssize_t mmio_stale_data_show_state(char *buf) + + static char *stibp_state(void) + { +- if (spectre_v2_in_eibrs_mode(spectre_v2_enabled)) ++ if (spectre_v2_in_ibrs_mode(spectre_v2_enabled)) + return ""; + + switch (spectre_v2_user_stibp) { diff --git a/debian/patches/bugfix/x86/retbleed/0025-x86-bugs-Split-spectre_v2_select_mitigation-and-spec.patch b/debian/patches/bugfix/x86/retbleed/0025-x86-bugs-Split-spectre_v2_select_mitigation-and-spec.patch new file mode 100644 index 000000000..891d03913 --- /dev/null +++ b/debian/patches/bugfix/x86/retbleed/0025-x86-bugs-Split-spectre_v2_select_mitigation-and-spec.patch @@ -0,0 +1,99 @@ +From: Peter Zijlstra <peterz@infradead.org> +Date: Tue, 14 Jun 2022 23:15:56 +0200 +Subject: x86/bugs: Split spectre_v2_select_mitigation() and + spectre_v2_user_select_mitigation() +Origin: https://git.kernel.org/linus/166115c08a9b0b846b783088808a27d739be6e8d + +retbleed will depend on spectre_v2, while spectre_v2_user depends on +retbleed. Break this cycle. + +Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> +Signed-off-by: Borislav Petkov <bp@suse.de> +Reviewed-by: Josh Poimboeuf <jpoimboe@kernel.org> +Signed-off-by: Borislav Petkov <bp@suse.de> +--- + arch/x86/kernel/cpu/bugs.c | 25 +++++++++++++++++-------- + 1 file changed, 17 insertions(+), 8 deletions(-) + +diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c +index 2d7896493220..c546a9e4ed17 100644 +--- a/arch/x86/kernel/cpu/bugs.c ++++ b/arch/x86/kernel/cpu/bugs.c +@@ -37,8 +37,9 @@ + #include "cpu.h" + + static void __init spectre_v1_select_mitigation(void); +-static void __init retbleed_select_mitigation(void); + static void __init spectre_v2_select_mitigation(void); ++static void __init retbleed_select_mitigation(void); ++static void __init spectre_v2_user_select_mitigation(void); + static void __init ssb_select_mitigation(void); + static void __init l1tf_select_mitigation(void); + static void __init mds_select_mitigation(void); +@@ -145,13 +146,19 @@ void __init check_bugs(void) + + /* Select the proper CPU mitigations before patching alternatives: */ + spectre_v1_select_mitigation(); ++ spectre_v2_select_mitigation(); ++ /* ++ * retbleed_select_mitigation() relies on the state set by ++ * spectre_v2_select_mitigation(); specifically it wants to know about ++ * spectre_v2=ibrs. ++ */ + retbleed_select_mitigation(); + /* +- * spectre_v2_select_mitigation() relies on the state set by ++ * spectre_v2_user_select_mitigation() relies on the state set by + * retbleed_select_mitigation(); specifically the STIBP selection is + * forced for UNRET. + */ +- spectre_v2_select_mitigation(); ++ spectre_v2_user_select_mitigation(); + ssb_select_mitigation(); + l1tf_select_mitigation(); + md_clear_select_mitigation(); +@@ -1013,13 +1020,15 @@ static void __init spec_v2_user_print_cond(const char *reason, bool secure) + pr_info("spectre_v2_user=%s forced on command line.\n", reason); + } + ++static __ro_after_init enum spectre_v2_mitigation_cmd spectre_v2_cmd; ++ + static enum spectre_v2_user_cmd __init +-spectre_v2_parse_user_cmdline(enum spectre_v2_mitigation_cmd v2_cmd) ++spectre_v2_parse_user_cmdline(void) + { + char arg[20]; + int ret, i; + +- switch (v2_cmd) { ++ switch (spectre_v2_cmd) { + case SPECTRE_V2_CMD_NONE: + return SPECTRE_V2_USER_CMD_NONE; + case SPECTRE_V2_CMD_FORCE: +@@ -1054,7 +1063,7 @@ static inline bool spectre_v2_in_ibrs_mode(enum spectre_v2_mitigation mode) + } + + static void __init +-spectre_v2_user_select_mitigation(enum spectre_v2_mitigation_cmd v2_cmd) ++spectre_v2_user_select_mitigation(void) + { + enum spectre_v2_user_mitigation mode = SPECTRE_V2_USER_NONE; + bool smt_possible = IS_ENABLED(CONFIG_SMP); +@@ -1067,7 +1076,7 @@ spectre_v2_user_select_mitigation(enum spectre_v2_mitigation_cmd v2_cmd) + cpu_smt_control == CPU_SMT_NOT_SUPPORTED) + smt_possible = false; + +- cmd = spectre_v2_parse_user_cmdline(v2_cmd); ++ cmd = spectre_v2_parse_user_cmdline(); + switch (cmd) { + case SPECTRE_V2_USER_CMD_NONE: + goto set_mode; +@@ -1391,7 +1400,7 @@ static void __init spectre_v2_select_mitigation(void) + } + + /* Set up IBPB and STIBP depending on the general spectre V2 command */ +- spectre_v2_user_select_mitigation(cmd); ++ spectre_v2_cmd = cmd; + } + + static void update_stibp_msr(void * __unused) diff --git a/debian/patches/bugfix/x86/retbleed/0026-x86-bugs-Report-Intel-retbleed-vulnerability.patch b/debian/patches/bugfix/x86/retbleed/0026-x86-bugs-Report-Intel-retbleed-vulnerability.patch new file mode 100644 index 000000000..d5f12f8d8 --- /dev/null +++ b/debian/patches/bugfix/x86/retbleed/0026-x86-bugs-Report-Intel-retbleed-vulnerability.patch @@ -0,0 +1,174 @@ +From: Peter Zijlstra <peterz@infradead.org> +Date: Fri, 24 Jun 2022 13:48:58 +0200 +Subject: x86/bugs: Report Intel retbleed vulnerability +Origin: https://git.kernel.org/linus/6ad0ad2bf8a67e27d1f9d006a1dabb0e1c360cc3 + +Skylake suffers from RSB underflow speculation issues; report this +vulnerability and it's mitigation (spectre_v2=ibrs). + + [jpoimboe: cleanups, eibrs] + +Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> +Signed-off-by: Borislav Petkov <bp@suse.de> +Reviewed-by: Josh Poimboeuf <jpoimboe@kernel.org> +Signed-off-by: Borislav Petkov <bp@suse.de> +--- + arch/x86/include/asm/msr-index.h | 1 + + arch/x86/kernel/cpu/bugs.c | 39 +++++++++++++++++++++++++++----- + arch/x86/kernel/cpu/common.c | 24 ++++++++++---------- + 3 files changed, 46 insertions(+), 18 deletions(-) + +diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h +index d27e0581b777..059c7e500ab4 100644 +--- a/arch/x86/include/asm/msr-index.h ++++ b/arch/x86/include/asm/msr-index.h +@@ -93,6 +93,7 @@ + #define MSR_IA32_ARCH_CAPABILITIES 0x0000010a + #define ARCH_CAP_RDCL_NO BIT(0) /* Not susceptible to Meltdown */ + #define ARCH_CAP_IBRS_ALL BIT(1) /* Enhanced IBRS support */ ++#define ARCH_CAP_RSBA BIT(2) /* RET may use alternative branch predictors */ + #define ARCH_CAP_SKIP_VMENTRY_L1DFLUSH BIT(3) /* Skip L1D flush on vmentry */ + #define ARCH_CAP_SSB_NO BIT(4) /* + * Not susceptible to Speculative Store Bypass +diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c +index c546a9e4ed17..05f29db9473b 100644 +--- a/arch/x86/kernel/cpu/bugs.c ++++ b/arch/x86/kernel/cpu/bugs.c +@@ -790,12 +790,17 @@ static int __init nospectre_v1_cmdline(char *str) + } + early_param("nospectre_v1", nospectre_v1_cmdline); + ++static enum spectre_v2_mitigation spectre_v2_enabled __ro_after_init = ++ SPECTRE_V2_NONE; ++ + #undef pr_fmt + #define pr_fmt(fmt) "RETBleed: " fmt + + enum retbleed_mitigation { + RETBLEED_MITIGATION_NONE, + RETBLEED_MITIGATION_UNRET, ++ RETBLEED_MITIGATION_IBRS, ++ RETBLEED_MITIGATION_EIBRS, + }; + + enum retbleed_mitigation_cmd { +@@ -807,6 +812,8 @@ enum retbleed_mitigation_cmd { + const char * const retbleed_strings[] = { + [RETBLEED_MITIGATION_NONE] = "Vulnerable", + [RETBLEED_MITIGATION_UNRET] = "Mitigation: untrained return thunk", ++ [RETBLEED_MITIGATION_IBRS] = "Mitigation: IBRS", ++ [RETBLEED_MITIGATION_EIBRS] = "Mitigation: Enhanced IBRS", + }; + + static enum retbleed_mitigation retbleed_mitigation __ro_after_init = +@@ -849,6 +856,7 @@ early_param("retbleed", retbleed_parse_cmdline); + + #define RETBLEED_UNTRAIN_MSG "WARNING: BTB untrained return thunk mitigation is only effective on AMD/Hygon!\n" + #define RETBLEED_COMPILER_MSG "WARNING: kernel not compiled with RETPOLINE or -mfunction-return capable compiler!\n" ++#define RETBLEED_INTEL_MSG "WARNING: Spectre v2 mitigation leaves CPU vulnerable to RETBleed attacks, data leaks possible!\n" + + static void __init retbleed_select_mitigation(void) + { +@@ -865,12 +873,15 @@ static void __init retbleed_select_mitigation(void) + + case RETBLEED_CMD_AUTO: + default: +- if (!boot_cpu_has_bug(X86_BUG_RETBLEED)) +- break; +- + if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD || + boot_cpu_data.x86_vendor == X86_VENDOR_HYGON) + retbleed_mitigation = RETBLEED_MITIGATION_UNRET; ++ ++ /* ++ * The Intel mitigation (IBRS) was already selected in ++ * spectre_v2_select_mitigation(). ++ */ ++ + break; + } + +@@ -900,15 +911,31 @@ static void __init retbleed_select_mitigation(void) + break; + } + ++ /* ++ * Let IBRS trump all on Intel without affecting the effects of the ++ * retbleed= cmdline option. ++ */ ++ if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) { ++ switch (spectre_v2_enabled) { ++ case SPECTRE_V2_IBRS: ++ retbleed_mitigation = RETBLEED_MITIGATION_IBRS; ++ break; ++ case SPECTRE_V2_EIBRS: ++ case SPECTRE_V2_EIBRS_RETPOLINE: ++ case SPECTRE_V2_EIBRS_LFENCE: ++ retbleed_mitigation = RETBLEED_MITIGATION_EIBRS; ++ break; ++ default: ++ pr_err(RETBLEED_INTEL_MSG); ++ } ++ } ++ + pr_info("%s\n", retbleed_strings[retbleed_mitigation]); + } + + #undef pr_fmt + #define pr_fmt(fmt) "Spectre V2 : " fmt + +-static enum spectre_v2_mitigation spectre_v2_enabled __ro_after_init = +- SPECTRE_V2_NONE; +- + static enum spectre_v2_user_mitigation spectre_v2_user_stibp __ro_after_init = + SPECTRE_V2_USER_NONE; + static enum spectre_v2_user_mitigation spectre_v2_user_ibpb __ro_after_init = +diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c +index 4089c173c6ae..75a5c720c852 100644 +--- a/arch/x86/kernel/cpu/common.c ++++ b/arch/x86/kernel/cpu/common.c +@@ -1237,24 +1237,24 @@ static const struct x86_cpu_id cpu_vuln_blacklist[] __initconst = { + VULNBL_INTEL_STEPPINGS(BROADWELL_G, X86_STEPPING_ANY, SRBDS), + VULNBL_INTEL_STEPPINGS(BROADWELL_X, X86_STEPPING_ANY, MMIO), + VULNBL_INTEL_STEPPINGS(BROADWELL, X86_STEPPING_ANY, SRBDS), +- VULNBL_INTEL_STEPPINGS(SKYLAKE_L, X86_STEPPINGS(0x3, 0x3), SRBDS | MMIO), ++ VULNBL_INTEL_STEPPINGS(SKYLAKE_L, X86_STEPPINGS(0x3, 0x3), SRBDS | MMIO | RETBLEED), + VULNBL_INTEL_STEPPINGS(SKYLAKE_L, X86_STEPPING_ANY, SRBDS), + VULNBL_INTEL_STEPPINGS(SKYLAKE_X, BIT(3) | BIT(4) | BIT(6) | +- BIT(7) | BIT(0xB), MMIO), +- VULNBL_INTEL_STEPPINGS(SKYLAKE, X86_STEPPINGS(0x3, 0x3), SRBDS | MMIO), ++ BIT(7) | BIT(0xB), MMIO | RETBLEED), ++ VULNBL_INTEL_STEPPINGS(SKYLAKE, X86_STEPPINGS(0x3, 0x3), SRBDS | MMIO | RETBLEED), + VULNBL_INTEL_STEPPINGS(SKYLAKE, X86_STEPPING_ANY, SRBDS), +- VULNBL_INTEL_STEPPINGS(KABYLAKE_L, X86_STEPPINGS(0x9, 0xC), SRBDS | MMIO), ++ VULNBL_INTEL_STEPPINGS(KABYLAKE_L, X86_STEPPINGS(0x9, 0xC), SRBDS | MMIO | RETBLEED), + VULNBL_INTEL_STEPPINGS(KABYLAKE_L, X86_STEPPINGS(0x0, 0x8), SRBDS), +- VULNBL_INTEL_STEPPINGS(KABYLAKE, X86_STEPPINGS(0x9, 0xD), SRBDS | MMIO), ++ VULNBL_INTEL_STEPPINGS(KABYLAKE, X86_STEPPINGS(0x9, 0xD), SRBDS | MMIO | RETBLEED), + VULNBL_INTEL_STEPPINGS(KABYLAKE, X86_STEPPINGS(0x0, 0x8), SRBDS), +- VULNBL_INTEL_STEPPINGS(ICELAKE_L, X86_STEPPINGS(0x5, 0x5), MMIO | MMIO_SBDS), ++ VULNBL_INTEL_STEPPINGS(ICELAKE_L, X86_STEPPINGS(0x5, 0x5), MMIO | MMIO_SBDS | RETBLEED), + VULNBL_INTEL_STEPPINGS(ICELAKE_D, X86_STEPPINGS(0x1, 0x1), MMIO), + VULNBL_INTEL_STEPPINGS(ICELAKE_X, X86_STEPPINGS(0x4, 0x6), MMIO), +- VULNBL_INTEL_STEPPINGS(COMETLAKE, BIT(2) | BIT(3) | BIT(5), MMIO | MMIO_SBDS), +- VULNBL_INTEL_STEPPINGS(COMETLAKE_L, X86_STEPPINGS(0x1, 0x1), MMIO | MMIO_SBDS), +- VULNBL_INTEL_STEPPINGS(COMETLAKE_L, X86_STEPPINGS(0x0, 0x0), MMIO), +- VULNBL_INTEL_STEPPINGS(LAKEFIELD, X86_STEPPINGS(0x1, 0x1), MMIO | MMIO_SBDS), +- VULNBL_INTEL_STEPPINGS(ROCKETLAKE, X86_STEPPINGS(0x1, 0x1), MMIO), ++ VULNBL_INTEL_STEPPINGS(COMETLAKE, BIT(2) | BIT(3) | BIT(5), MMIO | MMIO_SBDS | RETBLEED), ++ VULNBL_INTEL_STEPPINGS(COMETLAKE_L, X86_STEPPINGS(0x1, 0x1), MMIO | MMIO_SBDS | RETBLEED), ++ VULNBL_INTEL_STEPPINGS(COMETLAKE_L, X86_STEPPINGS(0x0, 0x0), MMIO | RETBLEED), ++ VULNBL_INTEL_STEPPINGS(LAKEFIELD, X86_STEPPINGS(0x1, 0x1), MMIO | MMIO_SBDS | RETBLEED), ++ VULNBL_INTEL_STEPPINGS(ROCKETLAKE, X86_STEPPINGS(0x1, 0x1), MMIO | RETBLEED), + VULNBL_INTEL_STEPPINGS(ATOM_TREMONT, X86_STEPPINGS(0x1, 0x1), MMIO | MMIO_SBDS), + VULNBL_INTEL_STEPPINGS(ATOM_TREMONT_D, X86_STEPPING_ANY, MMIO), + VULNBL_INTEL_STEPPINGS(ATOM_TREMONT_L, X86_STEPPINGS(0x0, 0x0), MMIO | MMIO_SBDS), +@@ -1364,7 +1364,7 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c) + !arch_cap_mmio_immune(ia32_cap)) + setup_force_cpu_bug(X86_BUG_MMIO_STALE_DATA); + +- if (cpu_matches(cpu_vuln_blacklist, RETBLEED)) ++ if ((cpu_matches(cpu_vuln_blacklist, RETBLEED) || (ia32_cap & ARCH_CAP_RSBA))) + setup_force_cpu_bug(X86_BUG_RETBLEED); + + if (cpu_matches(cpu_vuln_whitelist, NO_MELTDOWN)) diff --git a/debian/patches/bugfix/x86/retbleed/0027-intel_idle-Disable-IBRS-during-long-idle.patch b/debian/patches/bugfix/x86/retbleed/0027-intel_idle-Disable-IBRS-during-long-idle.patch new file mode 100644 index 000000000..d03bd3e98 --- /dev/null +++ b/debian/patches/bugfix/x86/retbleed/0027-intel_idle-Disable-IBRS-during-long-idle.patch @@ -0,0 +1,182 @@ +From: Peter Zijlstra <peterz@infradead.org> +Date: Tue, 14 Jun 2022 23:15:58 +0200 +Subject: intel_idle: Disable IBRS during long idle +Origin: https://git.kernel.org/linus/bf5835bcdb9635c97f85120dba9bfa21e111130f + +Having IBRS enabled while the SMT sibling is idle unnecessarily slows +down the running sibling. OTOH, disabling IBRS around idle takes two +MSR writes, which will increase the idle latency. + +Therefore, only disable IBRS around deeper idle states. Shallow idle +states are bounded by the tick in duration, since NOHZ is not allowed +for them by virtue of their short target residency. + +Only do this for mwait-driven idle, since that keeps interrupts disabled +across idle, which makes disabling IBRS vs IRQ-entry a non-issue. + +Note: C6 is a random threshold, most importantly C1 probably shouldn't +disable IBRS, benchmarking needed. + +Suggested-by: Tim Chen <tim.c.chen@linux.intel.com> +Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> +Signed-off-by: Borislav Petkov <bp@suse.de> +Reviewed-by: Josh Poimboeuf <jpoimboe@kernel.org> +Signed-off-by: Borislav Petkov <bp@suse.de> +--- + arch/x86/include/asm/nospec-branch.h | 1 + + arch/x86/kernel/cpu/bugs.c | 6 ++++ + drivers/idle/intel_idle.c | 44 ++++++++++++++++++++++++---- + 3 files changed, 45 insertions(+), 6 deletions(-) + +diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h +index e14046daa7ba..ce1acb557162 100644 +--- a/arch/x86/include/asm/nospec-branch.h ++++ b/arch/x86/include/asm/nospec-branch.h +@@ -255,6 +255,7 @@ static inline void indirect_branch_prediction_barrier(void) + /* The Intel SPEC CTRL MSR base value cache */ + extern u64 x86_spec_ctrl_base; + extern void write_spec_ctrl_current(u64 val, bool force); ++extern u64 spec_ctrl_current(void); + + /* + * With retpoline, we must use IBRS to restrict branch prediction +diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c +index 05f29db9473b..00e9c769ce0b 100644 +--- a/arch/x86/kernel/cpu/bugs.c ++++ b/arch/x86/kernel/cpu/bugs.c +@@ -79,6 +79,12 @@ void write_spec_ctrl_current(u64 val, bool force) + wrmsrl(MSR_IA32_SPEC_CTRL, val); + } + ++u64 spec_ctrl_current(void) ++{ ++ return this_cpu_read(x86_spec_ctrl_current); ++} ++EXPORT_SYMBOL_GPL(spec_ctrl_current); ++ + /* + * The vendor and possibly platform specific bits which can be modified in + * x86_spec_ctrl_base. +diff --git a/drivers/idle/intel_idle.c b/drivers/idle/intel_idle.c +index 424ef470223d..f5c6802aa6c3 100644 +--- a/drivers/idle/intel_idle.c ++++ b/drivers/idle/intel_idle.c +@@ -47,11 +47,13 @@ + #include <linux/tick.h> + #include <trace/events/power.h> + #include <linux/sched.h> ++#include <linux/sched/smt.h> + #include <linux/notifier.h> + #include <linux/cpu.h> + #include <linux/moduleparam.h> + #include <asm/cpu_device_id.h> + #include <asm/intel-family.h> ++#include <asm/nospec-branch.h> + #include <asm/mwait.h> + #include <asm/msr.h> + +@@ -105,6 +107,12 @@ static unsigned int mwait_substates __initdata; + */ + #define CPUIDLE_FLAG_ALWAYS_ENABLE BIT(15) + ++/* ++ * Disable IBRS across idle (when KERNEL_IBRS), is exclusive vs IRQ_ENABLE ++ * above. ++ */ ++#define CPUIDLE_FLAG_IBRS BIT(16) ++ + /* + * MWAIT takes an 8-bit "hint" in EAX "suggesting" + * the C-state (top nibble) and sub-state (bottom nibble) +@@ -159,6 +167,24 @@ static __cpuidle int intel_idle_irq(struct cpuidle_device *dev, + return ret; + } + ++static __cpuidle int intel_idle_ibrs(struct cpuidle_device *dev, ++ struct cpuidle_driver *drv, int index) ++{ ++ bool smt_active = sched_smt_active(); ++ u64 spec_ctrl = spec_ctrl_current(); ++ int ret; ++ ++ if (smt_active) ++ wrmsrl(MSR_IA32_SPEC_CTRL, 0); ++ ++ ret = __intel_idle(dev, drv, index); ++ ++ if (smt_active) ++ wrmsrl(MSR_IA32_SPEC_CTRL, spec_ctrl); ++ ++ return ret; ++} ++ + /** + * intel_idle_s2idle - Ask the processor to enter the given idle state. + * @dev: cpuidle device of the target CPU. +@@ -680,7 +706,7 @@ static struct cpuidle_state skl_cstates[] __initdata = { + { + .name = "C6", + .desc = "MWAIT 0x20", +- .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, ++ .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED | CPUIDLE_FLAG_IBRS, + .exit_latency = 85, + .target_residency = 200, + .enter = &intel_idle, +@@ -688,7 +714,7 @@ static struct cpuidle_state skl_cstates[] __initdata = { + { + .name = "C7s", + .desc = "MWAIT 0x33", +- .flags = MWAIT2flg(0x33) | CPUIDLE_FLAG_TLB_FLUSHED, ++ .flags = MWAIT2flg(0x33) | CPUIDLE_FLAG_TLB_FLUSHED | CPUIDLE_FLAG_IBRS, + .exit_latency = 124, + .target_residency = 800, + .enter = &intel_idle, +@@ -696,7 +722,7 @@ static struct cpuidle_state skl_cstates[] __initdata = { + { + .name = "C8", + .desc = "MWAIT 0x40", +- .flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED, ++ .flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED | CPUIDLE_FLAG_IBRS, + .exit_latency = 200, + .target_residency = 800, + .enter = &intel_idle, +@@ -704,7 +730,7 @@ static struct cpuidle_state skl_cstates[] __initdata = { + { + .name = "C9", + .desc = "MWAIT 0x50", +- .flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED, ++ .flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED | CPUIDLE_FLAG_IBRS, + .exit_latency = 480, + .target_residency = 5000, + .enter = &intel_idle, +@@ -712,7 +738,7 @@ static struct cpuidle_state skl_cstates[] __initdata = { + { + .name = "C10", + .desc = "MWAIT 0x60", +- .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED, ++ .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED | CPUIDLE_FLAG_IBRS, + .exit_latency = 890, + .target_residency = 5000, + .enter = &intel_idle, +@@ -741,7 +767,7 @@ static struct cpuidle_state skx_cstates[] __initdata = { + { + .name = "C6", + .desc = "MWAIT 0x20", +- .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED, ++ .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED | CPUIDLE_FLAG_IBRS, + .exit_latency = 133, + .target_residency = 600, + .enter = &intel_idle, +@@ -1819,6 +1845,12 @@ static void __init intel_idle_init_cstates_icpu(struct cpuidle_driver *drv) + if (cpuidle_state_table[cstate].flags & CPUIDLE_FLAG_IRQ_ENABLE) + drv->states[drv->state_count].enter = intel_idle_irq; + ++ if (cpu_feature_enabled(X86_FEATURE_KERNEL_IBRS) && ++ cpuidle_state_table[cstate].flags & CPUIDLE_FLAG_IBRS) { ++ WARN_ON_ONCE(cpuidle_state_table[cstate].flags & CPUIDLE_FLAG_IRQ_ENABLE); ++ drv->states[drv->state_count].enter = intel_idle_ibrs; ++ } ++ + if ((disabled_states_mask & BIT(drv->state_count)) || + ((icpu->use_acpi || force_use_acpi) && + intel_idle_off_by_default(mwait_hint) && diff --git a/debian/patches/bugfix/x86/retbleed/0028-objtool-Update-Retpoline-validation.patch b/debian/patches/bugfix/x86/retbleed/0028-objtool-Update-Retpoline-validation.patch new file mode 100644 index 000000000..b2d6395b9 --- /dev/null +++ b/debian/patches/bugfix/x86/retbleed/0028-objtool-Update-Retpoline-validation.patch @@ -0,0 +1,112 @@ +From: Peter Zijlstra <peterz@infradead.org> +Date: Tue, 14 Jun 2022 23:15:59 +0200 +Subject: objtool: Update Retpoline validation +Origin: https://git.kernel.org/linus/9bb2ec608a209018080ca262f771e6a9ff203b6f + +Update retpoline validation with the new CONFIG_RETPOLINE requirement of +not having bare naked RET instructions. + +Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> +Signed-off-by: Borislav Petkov <bp@suse.de> +Reviewed-by: Josh Poimboeuf <jpoimboe@kernel.org> +Signed-off-by: Borislav Petkov <bp@suse.de> +--- + arch/x86/include/asm/nospec-branch.h | 6 ++++++ + arch/x86/mm/mem_encrypt_boot.S | 2 ++ + arch/x86/xen/xen-head.S | 1 + + tools/objtool/check.c | 19 +++++++++++++------ + 4 files changed, 22 insertions(+), 6 deletions(-) + +diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h +index ce1acb557162..455d79c6c2f3 100644 +--- a/arch/x86/include/asm/nospec-branch.h ++++ b/arch/x86/include/asm/nospec-branch.h +@@ -75,6 +75,12 @@ + .popsection + .endm + ++/* ++ * (ab)use RETPOLINE_SAFE on RET to annotate away 'bare' RET instructions ++ * vs RETBleed validation. ++ */ ++#define ANNOTATE_UNRET_SAFE ANNOTATE_RETPOLINE_SAFE ++ + /* + * JMP_NOSPEC and CALL_NOSPEC macros can be used instead of a simple + * indirect jmp/call which may be susceptible to the Spectre variant 2 +diff --git a/arch/x86/mm/mem_encrypt_boot.S b/arch/x86/mm/mem_encrypt_boot.S +index d94dea450fa6..9de3d900bc92 100644 +--- a/arch/x86/mm/mem_encrypt_boot.S ++++ b/arch/x86/mm/mem_encrypt_boot.S +@@ -66,6 +66,7 @@ SYM_FUNC_START(sme_encrypt_execute) + pop %rbp + + /* Offset to __x86_return_thunk would be wrong here */ ++ ANNOTATE_UNRET_SAFE + ret + int3 + SYM_FUNC_END(sme_encrypt_execute) +@@ -154,6 +155,7 @@ SYM_FUNC_START(__enc_copy) + pop %r15 + + /* Offset to __x86_return_thunk would be wrong here */ ++ ANNOTATE_UNRET_SAFE + ret + int3 + .L__enc_copy_end: +diff --git a/arch/x86/xen/xen-head.S b/arch/x86/xen/xen-head.S +index 3a2cd93bf059..fa884fc73e07 100644 +--- a/arch/x86/xen/xen-head.S ++++ b/arch/x86/xen/xen-head.S +@@ -26,6 +26,7 @@ SYM_CODE_START(hypercall_page) + .rept (PAGE_SIZE / 32) + UNWIND_HINT_FUNC + ANNOTATE_NOENDBR ++ ANNOTATE_UNRET_SAFE + ret + /* + * Xen will write the hypercall page, and sort out ENDBR. +diff --git a/tools/objtool/check.c b/tools/objtool/check.c +index 4252cd05dfc4..7dc378156a63 100644 +--- a/tools/objtool/check.c ++++ b/tools/objtool/check.c +@@ -2115,8 +2115,9 @@ static int read_retpoline_hints(struct objtool_file *file) + } + + if (insn->type != INSN_JUMP_DYNAMIC && +- insn->type != INSN_CALL_DYNAMIC) { +- WARN_FUNC("retpoline_safe hint not an indirect jump/call", ++ insn->type != INSN_CALL_DYNAMIC && ++ insn->type != INSN_RETURN) { ++ WARN_FUNC("retpoline_safe hint not an indirect jump/call/ret", + insn->sec, insn->offset); + return -1; + } +@@ -3526,7 +3527,8 @@ static int validate_retpoline(struct objtool_file *file) + + for_each_insn(file, insn) { + if (insn->type != INSN_JUMP_DYNAMIC && +- insn->type != INSN_CALL_DYNAMIC) ++ insn->type != INSN_CALL_DYNAMIC && ++ insn->type != INSN_RETURN) + continue; + + if (insn->retpoline_safe) +@@ -3541,9 +3543,14 @@ static int validate_retpoline(struct objtool_file *file) + if (!strcmp(insn->sec->name, ".init.text") && !opts.module) + continue; + +- WARN_FUNC("indirect %s found in RETPOLINE build", +- insn->sec, insn->offset, +- insn->type == INSN_JUMP_DYNAMIC ? "jump" : "call"); ++ if (insn->type == INSN_RETURN) { ++ WARN_FUNC("'naked' return found in RETPOLINE build", ++ insn->sec, insn->offset); ++ } else { ++ WARN_FUNC("indirect %s found in RETPOLINE build", ++ insn->sec, insn->offset, ++ insn->type == INSN_JUMP_DYNAMIC ? "jump" : "call"); ++ } + + warnings++; + } diff --git a/debian/patches/bugfix/x86/retbleed/0029-x86-xen-Rename-SYS-entry-points.patch b/debian/patches/bugfix/x86/retbleed/0029-x86-xen-Rename-SYS-entry-points.patch new file mode 100644 index 000000000..fb41f5ad3 --- /dev/null +++ b/debian/patches/bugfix/x86/retbleed/0029-x86-xen-Rename-SYS-entry-points.patch @@ -0,0 +1,134 @@ +From: Peter Zijlstra <peterz@infradead.org> +Date: Tue, 14 Jun 2022 23:16:00 +0200 +Subject: x86/xen: Rename SYS* entry points +Origin: https://git.kernel.org/linus/b75b7f8ef1148be1b9321ffc2f6c19238904b438 + +Native SYS{CALL,ENTER} entry points are called +entry_SYS{CALL,ENTER}_{64,compat}, make sure the Xen versions are +named consistently. + +Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> +Signed-off-by: Borislav Petkov <bp@suse.de> +Reviewed-by: Josh Poimboeuf <jpoimboe@kernel.org> +Signed-off-by: Borislav Petkov <bp@suse.de> +--- + arch/x86/xen/setup.c | 6 +++--- + arch/x86/xen/xen-asm.S | 20 ++++++++++---------- + arch/x86/xen/xen-ops.h | 6 +++--- + 3 files changed, 16 insertions(+), 16 deletions(-) + +diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c +index 81aa46f770c5..cfa99e8f054b 100644 +--- a/arch/x86/xen/setup.c ++++ b/arch/x86/xen/setup.c +@@ -918,7 +918,7 @@ void xen_enable_sysenter(void) + if (!boot_cpu_has(sysenter_feature)) + return; + +- ret = register_callback(CALLBACKTYPE_sysenter, xen_sysenter_target); ++ ret = register_callback(CALLBACKTYPE_sysenter, xen_entry_SYSENTER_compat); + if(ret != 0) + setup_clear_cpu_cap(sysenter_feature); + } +@@ -927,7 +927,7 @@ void xen_enable_syscall(void) + { + int ret; + +- ret = register_callback(CALLBACKTYPE_syscall, xen_syscall_target); ++ ret = register_callback(CALLBACKTYPE_syscall, xen_entry_SYSCALL_64); + if (ret != 0) { + printk(KERN_ERR "Failed to set syscall callback: %d\n", ret); + /* Pretty fatal; 64-bit userspace has no other +@@ -936,7 +936,7 @@ void xen_enable_syscall(void) + + if (boot_cpu_has(X86_FEATURE_SYSCALL32)) { + ret = register_callback(CALLBACKTYPE_syscall32, +- xen_syscall32_target); ++ xen_entry_SYSCALL_compat); + if (ret != 0) + setup_clear_cpu_cap(X86_FEATURE_SYSCALL32); + } +diff --git a/arch/x86/xen/xen-asm.S b/arch/x86/xen/xen-asm.S +index caa9bc2fa100..6bf9d45b9178 100644 +--- a/arch/x86/xen/xen-asm.S ++++ b/arch/x86/xen/xen-asm.S +@@ -234,7 +234,7 @@ SYM_CODE_END(xenpv_restore_regs_and_return_to_usermode) + */ + + /* Normal 64-bit system call target */ +-SYM_CODE_START(xen_syscall_target) ++SYM_CODE_START(xen_entry_SYSCALL_64) + UNWIND_HINT_EMPTY + ENDBR + popq %rcx +@@ -249,12 +249,12 @@ SYM_CODE_START(xen_syscall_target) + movq $__USER_CS, 1*8(%rsp) + + jmp entry_SYSCALL_64_after_hwframe +-SYM_CODE_END(xen_syscall_target) ++SYM_CODE_END(xen_entry_SYSCALL_64) + + #ifdef CONFIG_IA32_EMULATION + + /* 32-bit compat syscall target */ +-SYM_CODE_START(xen_syscall32_target) ++SYM_CODE_START(xen_entry_SYSCALL_compat) + UNWIND_HINT_EMPTY + ENDBR + popq %rcx +@@ -269,10 +269,10 @@ SYM_CODE_START(xen_syscall32_target) + movq $__USER32_CS, 1*8(%rsp) + + jmp entry_SYSCALL_compat_after_hwframe +-SYM_CODE_END(xen_syscall32_target) ++SYM_CODE_END(xen_entry_SYSCALL_compat) + + /* 32-bit compat sysenter target */ +-SYM_CODE_START(xen_sysenter_target) ++SYM_CODE_START(xen_entry_SYSENTER_compat) + UNWIND_HINT_EMPTY + ENDBR + /* +@@ -291,19 +291,19 @@ SYM_CODE_START(xen_sysenter_target) + movq $__USER32_CS, 1*8(%rsp) + + jmp entry_SYSENTER_compat_after_hwframe +-SYM_CODE_END(xen_sysenter_target) ++SYM_CODE_END(xen_entry_SYSENTER_compat) + + #else /* !CONFIG_IA32_EMULATION */ + +-SYM_CODE_START(xen_syscall32_target) +-SYM_CODE_START(xen_sysenter_target) ++SYM_CODE_START(xen_entry_SYSCALL_compat) ++SYM_CODE_START(xen_entry_SYSENTER_compat) + UNWIND_HINT_EMPTY + ENDBR + lea 16(%rsp), %rsp /* strip %rcx, %r11 */ + mov $-ENOSYS, %rax + pushq $0 + jmp hypercall_iret +-SYM_CODE_END(xen_sysenter_target) +-SYM_CODE_END(xen_syscall32_target) ++SYM_CODE_END(xen_entry_SYSENTER_compat) ++SYM_CODE_END(xen_entry_SYSCALL_compat) + + #endif /* CONFIG_IA32_EMULATION */ +diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h +index fd0fec6e92f4..9a8bb972193d 100644 +--- a/arch/x86/xen/xen-ops.h ++++ b/arch/x86/xen/xen-ops.h +@@ -10,10 +10,10 @@ + /* These are code, but not functions. Defined in entry.S */ + extern const char xen_failsafe_callback[]; + +-void xen_sysenter_target(void); ++void xen_entry_SYSENTER_compat(void); + #ifdef CONFIG_X86_64 +-void xen_syscall_target(void); +-void xen_syscall32_target(void); ++void xen_entry_SYSCALL_64(void); ++void xen_entry_SYSCALL_compat(void); + #endif + + extern void *xen_initial_gdt; diff --git a/debian/patches/bugfix/x86/retbleed/0030-x86-xen-Add-UNTRAIN_RET.patch b/debian/patches/bugfix/x86/retbleed/0030-x86-xen-Add-UNTRAIN_RET.patch new file mode 100644 index 000000000..744faa4d8 --- /dev/null +++ b/debian/patches/bugfix/x86/retbleed/0030-x86-xen-Add-UNTRAIN_RET.patch @@ -0,0 +1,41 @@ +From: Peter Zijlstra <peterz@infradead.org> +Date: Tue, 14 Jun 2022 23:16:01 +0200 +Subject: x86/xen: Add UNTRAIN_RET +Origin: https://git.kernel.org/linus/d147553b64bad34d2f92cb7d8ba454ae95c3baac + +Ensure the Xen entry also passes through UNTRAIN_RET. + +Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> +Signed-off-by: Borislav Petkov <bp@suse.de> +Reviewed-by: Josh Poimboeuf <jpoimboe@kernel.org> +Signed-off-by: Borislav Petkov <bp@suse.de> +--- + arch/x86/entry/entry_64.S | 8 +++++++- + 1 file changed, 7 insertions(+), 1 deletion(-) + +diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S +index f4384a29086f..0c88802e1155 100644 +--- a/arch/x86/entry/entry_64.S ++++ b/arch/x86/entry/entry_64.S +@@ -328,6 +328,12 @@ SYM_CODE_END(ret_from_fork) + #endif + .endm + ++SYM_CODE_START_LOCAL(xen_error_entry) ++ UNWIND_HINT_FUNC ++ UNTRAIN_RET ++ RET ++SYM_CODE_END(xen_error_entry) ++ + /** + * idtentry_body - Macro to emit code calling the C function + * @cfunc: C function to be called +@@ -347,7 +353,7 @@ SYM_CODE_END(ret_from_fork) + * switch the CR3. So it can skip invoking error_entry(). + */ + ALTERNATIVE "call error_entry; movq %rax, %rsp", \ +- "", X86_FEATURE_XENPV ++ "call xen_error_entry", X86_FEATURE_XENPV + + ENCODE_FRAME_POINTER + UNWIND_HINT_REGS diff --git a/debian/patches/bugfix/x86/retbleed/0031-x86-bugs-Add-retbleed-ibpb.patch b/debian/patches/bugfix/x86/retbleed/0031-x86-bugs-Add-retbleed-ibpb.patch new file mode 100644 index 000000000..11ccfcf76 --- /dev/null +++ b/debian/patches/bugfix/x86/retbleed/0031-x86-bugs-Add-retbleed-ibpb.patch @@ -0,0 +1,260 @@ +From: Peter Zijlstra <peterz@infradead.org> +Date: Tue, 14 Jun 2022 23:16:02 +0200 +Subject: x86/bugs: Add retbleed=ibpb +Origin: https://git.kernel.org/linus/3ebc170068885b6fc7bedda6c667bb2c4d533159 + +jmp2ret mitigates the easy-to-attack case at relatively low overhead. +It mitigates the long speculation windows after a mispredicted RET, but +it does not mitigate the short speculation window from arbitrary +instruction boundaries. + +On Zen2, there is a chicken bit which needs setting, which mitigates +"arbitrary instruction boundaries" down to just "basic block boundaries". + +But there is no fix for the short speculation window on basic block +boundaries, other than to flush the entire BTB to evict all attacker +predictions. + +On the spectrum of "fast & blurry" -> "safe", there is (on top of STIBP +or no-SMT): + + 1) Nothing System wide open + 2) jmp2ret May stop a script kiddy + 3) jmp2ret+chickenbit Raises the bar rather further + 4) IBPB Only thing which can count as "safe". + +Tentative numbers put IBPB-on-entry at a 2.5x hit on Zen2, and a 10x hit +on Zen1 according to lmbench. + + [ bp: Fixup feature bit comments, document option, 32-bit build fix. ] + +Suggested-by: Andrew Cooper <Andrew.Cooper3@citrix.com> +Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> +Signed-off-by: Borislav Petkov <bp@suse.de> +Reviewed-by: Josh Poimboeuf <jpoimboe@kernel.org> +Signed-off-by: Borislav Petkov <bp@suse.de> +--- + .../admin-guide/kernel-parameters.txt | 3 ++ + arch/x86/entry/Makefile | 2 +- + arch/x86/entry/entry.S | 22 ++++++++++ + arch/x86/include/asm/cpufeatures.h | 2 +- + arch/x86/include/asm/nospec-branch.h | 8 +++- + arch/x86/kernel/cpu/bugs.c | 43 +++++++++++++++---- + 6 files changed, 67 insertions(+), 13 deletions(-) + create mode 100644 arch/x86/entry/entry.S + +diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt +index 0aeea69e9b7a..f2d26cb7e853 100644 +--- a/Documentation/admin-guide/kernel-parameters.txt ++++ b/Documentation/admin-guide/kernel-parameters.txt +@@ -5207,6 +5207,9 @@ + disabling SMT if necessary for + the full mitigation (only on Zen1 + and older without STIBP). ++ ibpb - mitigate short speculation windows on ++ basic block boundaries too. Safe, highest ++ perf impact. + unret - force enable untrained return thunks, + only effective on AMD f15h-f17h + based systems. +diff --git a/arch/x86/entry/Makefile b/arch/x86/entry/Makefile +index 7fec5dcf6438..eeadbd7d92cc 100644 +--- a/arch/x86/entry/Makefile ++++ b/arch/x86/entry/Makefile +@@ -11,7 +11,7 @@ CFLAGS_REMOVE_common.o = $(CC_FLAGS_FTRACE) + + CFLAGS_common.o += -fno-stack-protector + +-obj-y := entry_$(BITS).o thunk_$(BITS).o syscall_$(BITS).o ++obj-y := entry.o entry_$(BITS).o thunk_$(BITS).o syscall_$(BITS).o + obj-y += common.o + + obj-y += vdso/ +diff --git a/arch/x86/entry/entry.S b/arch/x86/entry/entry.S +new file mode 100644 +index 000000000000..bfb7bcb362bc +--- /dev/null ++++ b/arch/x86/entry/entry.S +@@ -0,0 +1,22 @@ ++/* SPDX-License-Identifier: GPL-2.0 */ ++/* ++ * Common place for both 32- and 64-bit entry routines. ++ */ ++ ++#include <linux/linkage.h> ++#include <asm/export.h> ++#include <asm/msr-index.h> ++ ++.pushsection .noinstr.text, "ax" ++ ++SYM_FUNC_START(entry_ibpb) ++ movl $MSR_IA32_PRED_CMD, %ecx ++ movl $PRED_CMD_IBPB, %eax ++ xorl %edx, %edx ++ wrmsr ++ RET ++SYM_FUNC_END(entry_ibpb) ++/* For KVM */ ++EXPORT_SYMBOL_GPL(entry_ibpb); ++ ++.popsection +diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h +index b5bb35e8588e..09dce77f4848 100644 +--- a/arch/x86/include/asm/cpufeatures.h ++++ b/arch/x86/include/asm/cpufeatures.h +@@ -296,7 +296,7 @@ + #define X86_FEATURE_PER_THREAD_MBA (11*32+ 7) /* "" Per-thread Memory Bandwidth Allocation */ + #define X86_FEATURE_SGX1 (11*32+ 8) /* "" Basic SGX */ + #define X86_FEATURE_SGX2 (11*32+ 9) /* "" SGX Enclave Dynamic Memory Management (EDMM) */ +-/* FREE! (11*32+10) */ ++#define X86_FEATURE_ENTRY_IBPB (11*32+10) /* "" Issue an IBPB on kernel entry */ + /* FREE! (11*32+11) */ + #define X86_FEATURE_RETPOLINE (11*32+12) /* "" Generic Retpoline mitigation for Spectre variant 2 */ + #define X86_FEATURE_RETPOLINE_LFENCE (11*32+13) /* "" Use LFENCE for Spectre variant 2 */ +diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h +index 455d79c6c2f3..05dd75478d7b 100644 +--- a/arch/x86/include/asm/nospec-branch.h ++++ b/arch/x86/include/asm/nospec-branch.h +@@ -123,14 +123,17 @@ + * return thunk isn't mapped into the userspace tables (then again, AMD + * typically has NO_MELTDOWN). + * +- * Doesn't clobber any registers but does require a stable stack. ++ * While zen_untrain_ret() doesn't clobber anything but requires stack, ++ * entry_ibpb() will clobber AX, CX, DX. + * + * As such, this must be placed after every *SWITCH_TO_KERNEL_CR3 at a point + * where we have a stack but before any RET instruction. + */ + .macro UNTRAIN_RET + #ifdef CONFIG_RETPOLINE +- ALTERNATIVE "", "call zen_untrain_ret", X86_FEATURE_UNRET ++ ALTERNATIVE_2 "", \ ++ "call zen_untrain_ret", X86_FEATURE_UNRET, \ ++ "call entry_ibpb", X86_FEATURE_ENTRY_IBPB + #endif + .endm + +@@ -147,6 +150,7 @@ extern retpoline_thunk_t __x86_indirect_thunk_array[]; + + extern void __x86_return_thunk(void); + extern void zen_untrain_ret(void); ++extern void entry_ibpb(void); + + #ifdef CONFIG_RETPOLINE + +diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c +index 00e9c769ce0b..04077d13b3ae 100644 +--- a/arch/x86/kernel/cpu/bugs.c ++++ b/arch/x86/kernel/cpu/bugs.c +@@ -805,6 +805,7 @@ static enum spectre_v2_mitigation spectre_v2_enabled __ro_after_init = + enum retbleed_mitigation { + RETBLEED_MITIGATION_NONE, + RETBLEED_MITIGATION_UNRET, ++ RETBLEED_MITIGATION_IBPB, + RETBLEED_MITIGATION_IBRS, + RETBLEED_MITIGATION_EIBRS, + }; +@@ -813,11 +814,13 @@ enum retbleed_mitigation_cmd { + RETBLEED_CMD_OFF, + RETBLEED_CMD_AUTO, + RETBLEED_CMD_UNRET, ++ RETBLEED_CMD_IBPB, + }; + + const char * const retbleed_strings[] = { + [RETBLEED_MITIGATION_NONE] = "Vulnerable", + [RETBLEED_MITIGATION_UNRET] = "Mitigation: untrained return thunk", ++ [RETBLEED_MITIGATION_IBPB] = "Mitigation: IBPB", + [RETBLEED_MITIGATION_IBRS] = "Mitigation: IBRS", + [RETBLEED_MITIGATION_EIBRS] = "Mitigation: Enhanced IBRS", + }; +@@ -847,6 +850,8 @@ static int __init retbleed_parse_cmdline(char *str) + retbleed_cmd = RETBLEED_CMD_AUTO; + } else if (!strcmp(str, "unret")) { + retbleed_cmd = RETBLEED_CMD_UNRET; ++ } else if (!strcmp(str, "ibpb")) { ++ retbleed_cmd = RETBLEED_CMD_IBPB; + } else if (!strcmp(str, "nosmt")) { + retbleed_nosmt = true; + } else { +@@ -861,11 +866,13 @@ static int __init retbleed_parse_cmdline(char *str) + early_param("retbleed", retbleed_parse_cmdline); + + #define RETBLEED_UNTRAIN_MSG "WARNING: BTB untrained return thunk mitigation is only effective on AMD/Hygon!\n" +-#define RETBLEED_COMPILER_MSG "WARNING: kernel not compiled with RETPOLINE or -mfunction-return capable compiler!\n" ++#define RETBLEED_COMPILER_MSG "WARNING: kernel not compiled with RETPOLINE or -mfunction-return capable compiler; falling back to IBPB!\n" + #define RETBLEED_INTEL_MSG "WARNING: Spectre v2 mitigation leaves CPU vulnerable to RETBleed attacks, data leaks possible!\n" + + static void __init retbleed_select_mitigation(void) + { ++ bool mitigate_smt = false; ++ + if (!boot_cpu_has_bug(X86_BUG_RETBLEED) || cpu_mitigations_off()) + return; + +@@ -877,11 +884,21 @@ static void __init retbleed_select_mitigation(void) + retbleed_mitigation = RETBLEED_MITIGATION_UNRET; + break; + ++ case RETBLEED_CMD_IBPB: ++ retbleed_mitigation = RETBLEED_MITIGATION_IBPB; ++ break; ++ + case RETBLEED_CMD_AUTO: + default: + if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD || +- boot_cpu_data.x86_vendor == X86_VENDOR_HYGON) +- retbleed_mitigation = RETBLEED_MITIGATION_UNRET; ++ boot_cpu_data.x86_vendor == X86_VENDOR_HYGON) { ++ ++ if (IS_ENABLED(CONFIG_RETPOLINE) && ++ IS_ENABLED(CONFIG_CC_HAS_RETURN_THUNK)) ++ retbleed_mitigation = RETBLEED_MITIGATION_UNRET; ++ else ++ retbleed_mitigation = RETBLEED_MITIGATION_IBPB; ++ } + + /* + * The Intel mitigation (IBRS) was already selected in +@@ -897,26 +914,34 @@ static void __init retbleed_select_mitigation(void) + if (!IS_ENABLED(CONFIG_RETPOLINE) || + !IS_ENABLED(CONFIG_CC_HAS_RETURN_THUNK)) { + pr_err(RETBLEED_COMPILER_MSG); +- retbleed_mitigation = RETBLEED_MITIGATION_NONE; +- break; ++ retbleed_mitigation = RETBLEED_MITIGATION_IBPB; ++ goto retbleed_force_ibpb; + } + + setup_force_cpu_cap(X86_FEATURE_RETHUNK); + setup_force_cpu_cap(X86_FEATURE_UNRET); + +- if (!boot_cpu_has(X86_FEATURE_STIBP) && +- (retbleed_nosmt || cpu_mitigations_auto_nosmt())) +- cpu_smt_disable(false); +- + if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD && + boot_cpu_data.x86_vendor != X86_VENDOR_HYGON) + pr_err(RETBLEED_UNTRAIN_MSG); ++ ++ mitigate_smt = true; ++ break; ++ ++ case RETBLEED_MITIGATION_IBPB: ++retbleed_force_ibpb: ++ setup_force_cpu_cap(X86_FEATURE_ENTRY_IBPB); ++ mitigate_smt = true; + break; + + default: + break; + } + ++ if (mitigate_smt && !boot_cpu_has(X86_FEATURE_STIBP) && ++ (retbleed_nosmt || cpu_mitigations_auto_nosmt())) ++ cpu_smt_disable(false); ++ + /* + * Let IBRS trump all on Intel without affecting the effects of the + * retbleed= cmdline option. diff --git a/debian/patches/bugfix/x86/retbleed/0032-x86-bugs-Do-IBPB-fallback-check-only-once.patch b/debian/patches/bugfix/x86/retbleed/0032-x86-bugs-Do-IBPB-fallback-check-only-once.patch new file mode 100644 index 000000000..00cfa87fa --- /dev/null +++ b/debian/patches/bugfix/x86/retbleed/0032-x86-bugs-Do-IBPB-fallback-check-only-once.patch @@ -0,0 +1,44 @@ +From: Josh Poimboeuf <jpoimboe@kernel.org> +Date: Tue, 14 Jun 2022 15:07:19 -0700 +Subject: x86/bugs: Do IBPB fallback check only once +Origin: https://git.kernel.org/linus/0fe4aeea9c01baabecc8c3afc7889c809d939bc2 + +When booting with retbleed=auto, if the kernel wasn't built with +CONFIG_CC_HAS_RETURN_THUNK, the mitigation falls back to IBPB. Make +sure a warning is printed in that case. The IBPB fallback check is done +twice, but it really only needs to be done once. + +Signed-off-by: Josh Poimboeuf <jpoimboe@kernel.org> +Signed-off-by: Borislav Petkov <bp@suse.de> +--- + arch/x86/kernel/cpu/bugs.c | 15 +++++---------- + 1 file changed, 5 insertions(+), 10 deletions(-) + +diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c +index 04077d13b3ae..108bd74289c5 100644 +--- a/arch/x86/kernel/cpu/bugs.c ++++ b/arch/x86/kernel/cpu/bugs.c +@@ -891,18 +891,13 @@ static void __init retbleed_select_mitigation(void) + case RETBLEED_CMD_AUTO: + default: + if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD || +- boot_cpu_data.x86_vendor == X86_VENDOR_HYGON) { +- +- if (IS_ENABLED(CONFIG_RETPOLINE) && +- IS_ENABLED(CONFIG_CC_HAS_RETURN_THUNK)) +- retbleed_mitigation = RETBLEED_MITIGATION_UNRET; +- else +- retbleed_mitigation = RETBLEED_MITIGATION_IBPB; +- } ++ boot_cpu_data.x86_vendor == X86_VENDOR_HYGON) ++ retbleed_mitigation = RETBLEED_MITIGATION_UNRET; + + /* +- * The Intel mitigation (IBRS) was already selected in +- * spectre_v2_select_mitigation(). ++ * The Intel mitigation (IBRS or eIBRS) was already selected in ++ * spectre_v2_select_mitigation(). 'retbleed_mitigation' will ++ * be set accordingly below. + */ + + break; diff --git a/debian/patches/bugfix/x86/retbleed/0033-objtool-Add-entry-UNRET-validation.patch b/debian/patches/bugfix/x86/retbleed/0033-objtool-Add-entry-UNRET-validation.patch new file mode 100644 index 000000000..b78b006dc --- /dev/null +++ b/debian/patches/bugfix/x86/retbleed/0033-objtool-Add-entry-UNRET-validation.patch @@ -0,0 +1,559 @@ +From: Peter Zijlstra <peterz@infradead.org> +Date: Tue, 14 Jun 2022 23:16:03 +0200 +Subject: objtool: Add entry UNRET validation +Origin: https://git.kernel.org/linus/a09a6e2399ba0595c3042b3164f3ca68a3cff33e + +Since entry asm is tricky, add a validation pass that ensures the +retbleed mitigation has been done before the first actual RET +instruction. + +Entry points are those that either have UNWIND_HINT_ENTRY, which acts +as UNWIND_HINT_EMPTY but marks the instruction as an entry point, or +those that have UWIND_HINT_IRET_REGS at +0. + +This is basically a variant of validate_branch() that is +intra-function and it will simply follow all branches from marked +entry points and ensures that all paths lead to ANNOTATE_UNRET_END. + +If a path hits RET or an indirection the path is a fail and will be +reported. + +There are 3 ANNOTATE_UNRET_END instances: + + - UNTRAIN_RET itself + - exception from-kernel; this path doesn't need UNTRAIN_RET + - all early exceptions; these also don't need UNTRAIN_RET + +Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> +Signed-off-by: Borislav Petkov <bp@suse.de> +Reviewed-by: Josh Poimboeuf <jpoimboe@kernel.org> +Signed-off-by: Borislav Petkov <bp@suse.de> +--- + arch/x86/entry/entry_64.S | 3 +- + arch/x86/entry/entry_64_compat.S | 6 +- + arch/x86/include/asm/nospec-branch.h | 12 ++ + arch/x86/include/asm/unwind_hints.h | 4 + + arch/x86/kernel/head_64.S | 5 + + arch/x86/xen/xen-asm.S | 10 +- + include/linux/objtool.h | 3 + + scripts/Makefile.vmlinux_o | 2 +- + tools/include/linux/objtool.h | 3 + + tools/objtool/builtin-check.c | 6 + + tools/objtool/check.c | 177 ++++++++++++++++++++++-- + tools/objtool/include/objtool/builtin.h | 1 + + tools/objtool/include/objtool/check.h | 11 +- + 13 files changed, 222 insertions(+), 21 deletions(-) + +diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S +index 0c88802e1155..65e3b8b7cbe5 100644 +--- a/arch/x86/entry/entry_64.S ++++ b/arch/x86/entry/entry_64.S +@@ -85,7 +85,7 @@ + */ + + SYM_CODE_START(entry_SYSCALL_64) +- UNWIND_HINT_EMPTY ++ UNWIND_HINT_ENTRY + ENDBR + + swapgs +@@ -1095,6 +1095,7 @@ SYM_CODE_START_LOCAL(error_entry) + .Lerror_entry_done_lfence: + FENCE_SWAPGS_KERNEL_ENTRY + leaq 8(%rsp), %rax /* return pt_regs pointer */ ++ ANNOTATE_UNRET_END + RET + + .Lbstep_iret: +diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S +index bcb89d23ac0e..682338e7e2a3 100644 +--- a/arch/x86/entry/entry_64_compat.S ++++ b/arch/x86/entry/entry_64_compat.S +@@ -49,7 +49,7 @@ + * 0(%ebp) arg6 + */ + SYM_CODE_START(entry_SYSENTER_compat) +- UNWIND_HINT_EMPTY ++ UNWIND_HINT_ENTRY + ENDBR + /* Interrupts are off on entry. */ + swapgs +@@ -179,7 +179,7 @@ SYM_CODE_END(entry_SYSENTER_compat) + * 0(%esp) arg6 + */ + SYM_CODE_START(entry_SYSCALL_compat) +- UNWIND_HINT_EMPTY ++ UNWIND_HINT_ENTRY + ENDBR + /* Interrupts are off on entry. */ + swapgs +@@ -305,7 +305,7 @@ SYM_CODE_END(entry_SYSCALL_compat) + * ebp arg6 + */ + SYM_CODE_START(entry_INT80_compat) +- UNWIND_HINT_EMPTY ++ UNWIND_HINT_ENTRY + ENDBR + /* + * Interrupts are off on entry. +diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h +index 05dd75478d7b..bba42bd78edf 100644 +--- a/arch/x86/include/asm/nospec-branch.h ++++ b/arch/x86/include/asm/nospec-branch.h +@@ -81,6 +81,17 @@ + */ + #define ANNOTATE_UNRET_SAFE ANNOTATE_RETPOLINE_SAFE + ++/* ++ * Abuse ANNOTATE_RETPOLINE_SAFE on a NOP to indicate UNRET_END, should ++ * eventually turn into it's own annotation. ++ */ ++.macro ANNOTATE_UNRET_END ++#ifdef CONFIG_DEBUG_ENTRY ++ ANNOTATE_RETPOLINE_SAFE ++ nop ++#endif ++.endm ++ + /* + * JMP_NOSPEC and CALL_NOSPEC macros can be used instead of a simple + * indirect jmp/call which may be susceptible to the Spectre variant 2 +@@ -131,6 +142,7 @@ + */ + .macro UNTRAIN_RET + #ifdef CONFIG_RETPOLINE ++ ANNOTATE_UNRET_END + ALTERNATIVE_2 "", \ + "call zen_untrain_ret", X86_FEATURE_UNRET, \ + "call entry_ibpb", X86_FEATURE_ENTRY_IBPB +diff --git a/arch/x86/include/asm/unwind_hints.h b/arch/x86/include/asm/unwind_hints.h +index 8b33674288ea..6f70fe4c93f2 100644 +--- a/arch/x86/include/asm/unwind_hints.h ++++ b/arch/x86/include/asm/unwind_hints.h +@@ -11,6 +11,10 @@ + UNWIND_HINT sp_reg=ORC_REG_UNDEFINED type=UNWIND_HINT_TYPE_CALL end=1 + .endm + ++.macro UNWIND_HINT_ENTRY ++ UNWIND_HINT sp_reg=ORC_REG_UNDEFINED type=UNWIND_HINT_TYPE_ENTRY end=1 ++.endm ++ + .macro UNWIND_HINT_REGS base=%rsp offset=0 indirect=0 extra=1 partial=0 + .if \base == %rsp + .if \indirect +diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S +index 92c4afa2b729..d860d437631b 100644 +--- a/arch/x86/kernel/head_64.S ++++ b/arch/x86/kernel/head_64.S +@@ -389,6 +389,8 @@ SYM_CODE_START_NOALIGN(vc_boot_ghcb) + UNWIND_HINT_IRET_REGS offset=8 + ENDBR + ++ ANNOTATE_UNRET_END ++ + /* Build pt_regs */ + PUSH_AND_CLEAR_REGS + +@@ -448,6 +450,7 @@ SYM_CODE_END(early_idt_handler_array) + + SYM_CODE_START_LOCAL(early_idt_handler_common) + UNWIND_HINT_IRET_REGS offset=16 ++ ANNOTATE_UNRET_END + /* + * The stack is the hardware frame, an error code or zero, and the + * vector number. +@@ -497,6 +500,8 @@ SYM_CODE_START_NOALIGN(vc_no_ghcb) + UNWIND_HINT_IRET_REGS offset=8 + ENDBR + ++ ANNOTATE_UNRET_END ++ + /* Build pt_regs */ + PUSH_AND_CLEAR_REGS + +diff --git a/arch/x86/xen/xen-asm.S b/arch/x86/xen/xen-asm.S +index 6bf9d45b9178..6b4fdf6b9542 100644 +--- a/arch/x86/xen/xen-asm.S ++++ b/arch/x86/xen/xen-asm.S +@@ -121,7 +121,7 @@ SYM_FUNC_END(xen_read_cr2_direct); + + .macro xen_pv_trap name + SYM_CODE_START(xen_\name) +- UNWIND_HINT_EMPTY ++ UNWIND_HINT_ENTRY + ENDBR + pop %rcx + pop %r11 +@@ -235,7 +235,7 @@ SYM_CODE_END(xenpv_restore_regs_and_return_to_usermode) + + /* Normal 64-bit system call target */ + SYM_CODE_START(xen_entry_SYSCALL_64) +- UNWIND_HINT_EMPTY ++ UNWIND_HINT_ENTRY + ENDBR + popq %rcx + popq %r11 +@@ -255,7 +255,7 @@ SYM_CODE_END(xen_entry_SYSCALL_64) + + /* 32-bit compat syscall target */ + SYM_CODE_START(xen_entry_SYSCALL_compat) +- UNWIND_HINT_EMPTY ++ UNWIND_HINT_ENTRY + ENDBR + popq %rcx + popq %r11 +@@ -273,7 +273,7 @@ SYM_CODE_END(xen_entry_SYSCALL_compat) + + /* 32-bit compat sysenter target */ + SYM_CODE_START(xen_entry_SYSENTER_compat) +- UNWIND_HINT_EMPTY ++ UNWIND_HINT_ENTRY + ENDBR + /* + * NB: Xen is polite and clears TF from EFLAGS for us. This means +@@ -297,7 +297,7 @@ SYM_CODE_END(xen_entry_SYSENTER_compat) + + SYM_CODE_START(xen_entry_SYSCALL_compat) + SYM_CODE_START(xen_entry_SYSENTER_compat) +- UNWIND_HINT_EMPTY ++ UNWIND_HINT_ENTRY + ENDBR + lea 16(%rsp), %rsp /* strip %rcx, %r11 */ + mov $-ENOSYS, %rax +diff --git a/include/linux/objtool.h b/include/linux/objtool.h +index 15b940ec1eac..b026f1ae39c6 100644 +--- a/include/linux/objtool.h ++++ b/include/linux/objtool.h +@@ -32,11 +32,14 @@ struct unwind_hint { + * + * UNWIND_HINT_FUNC: Generate the unwind metadata of a callable function. + * Useful for code which doesn't have an ELF function annotation. ++ * ++ * UNWIND_HINT_ENTRY: machine entry without stack, SYSCALL/SYSENTER etc. + */ + #define UNWIND_HINT_TYPE_CALL 0 + #define UNWIND_HINT_TYPE_REGS 1 + #define UNWIND_HINT_TYPE_REGS_PARTIAL 2 + #define UNWIND_HINT_TYPE_FUNC 3 ++#define UNWIND_HINT_TYPE_ENTRY 4 + + #ifdef CONFIG_OBJTOOL + +diff --git a/scripts/Makefile.vmlinux_o b/scripts/Makefile.vmlinux_o +index 3c97a1564947..bc67748044a6 100644 +--- a/scripts/Makefile.vmlinux_o ++++ b/scripts/Makefile.vmlinux_o +@@ -44,7 +44,7 @@ objtool-enabled := $(or $(delay-objtool),$(CONFIG_NOINSTR_VALIDATION)) + + objtool_args := \ + $(if $(delay-objtool),$(objtool_args)) \ +- $(if $(CONFIG_NOINSTR_VALIDATION), --noinstr) \ ++ $(if $(CONFIG_NOINSTR_VALIDATION), --noinstr $(if $(CONFIG_RETPOLINE), --unret)) \ + $(if $(CONFIG_GCOV_KERNEL), --no-unreachable) \ + --link + +diff --git a/tools/include/linux/objtool.h b/tools/include/linux/objtool.h +index 15b940ec1eac..b026f1ae39c6 100644 +--- a/tools/include/linux/objtool.h ++++ b/tools/include/linux/objtool.h +@@ -32,11 +32,14 @@ struct unwind_hint { + * + * UNWIND_HINT_FUNC: Generate the unwind metadata of a callable function. + * Useful for code which doesn't have an ELF function annotation. ++ * ++ * UNWIND_HINT_ENTRY: machine entry without stack, SYSCALL/SYSENTER etc. + */ + #define UNWIND_HINT_TYPE_CALL 0 + #define UNWIND_HINT_TYPE_REGS 1 + #define UNWIND_HINT_TYPE_REGS_PARTIAL 2 + #define UNWIND_HINT_TYPE_FUNC 3 ++#define UNWIND_HINT_TYPE_ENTRY 4 + + #ifdef CONFIG_OBJTOOL + +diff --git a/tools/objtool/builtin-check.c b/tools/objtool/builtin-check.c +index f4c3a5091737..c063e1ff96b2 100644 +--- a/tools/objtool/builtin-check.c ++++ b/tools/objtool/builtin-check.c +@@ -68,6 +68,7 @@ const struct option check_options[] = { + OPT_BOOLEAN('n', "noinstr", &opts.noinstr, "validate noinstr rules"), + OPT_BOOLEAN('o', "orc", &opts.orc, "generate ORC metadata"), + OPT_BOOLEAN('r', "retpoline", &opts.retpoline, "validate and annotate retpoline usage"), ++ OPT_BOOLEAN(0, "unret", &opts.unret, "validate entry unret placement"), + OPT_BOOLEAN('l', "sls", &opts.sls, "validate straight-line-speculation mitigations"), + OPT_BOOLEAN('s', "stackval", &opts.stackval, "validate frame pointer rules"), + OPT_BOOLEAN('t', "static-call", &opts.static_call, "annotate static calls"), +@@ -163,6 +164,11 @@ static bool link_opts_valid(struct objtool_file *file) + return false; + } + ++ if (opts.unret) { ++ ERROR("--unret requires --link"); ++ return false; ++ } ++ + return true; + } + +diff --git a/tools/objtool/check.c b/tools/objtool/check.c +index 7dc378156a63..822a490e6d87 100644 +--- a/tools/objtool/check.c ++++ b/tools/objtool/check.c +@@ -2032,16 +2032,24 @@ static int read_unwind_hints(struct objtool_file *file) + + insn->hint = true; + +- if (opts.ibt && hint->type == UNWIND_HINT_TYPE_REGS_PARTIAL) { ++ if (hint->type == UNWIND_HINT_TYPE_REGS_PARTIAL) { + struct symbol *sym = find_symbol_by_offset(insn->sec, insn->offset); + +- if (sym && sym->bind == STB_GLOBAL && +- insn->type != INSN_ENDBR && !insn->noendbr) { +- WARN_FUNC("UNWIND_HINT_IRET_REGS without ENDBR", +- insn->sec, insn->offset); ++ if (sym && sym->bind == STB_GLOBAL) { ++ if (opts.ibt && insn->type != INSN_ENDBR && !insn->noendbr) { ++ WARN_FUNC("UNWIND_HINT_IRET_REGS without ENDBR", ++ insn->sec, insn->offset); ++ } ++ ++ insn->entry = 1; + } + } + ++ if (hint->type == UNWIND_HINT_TYPE_ENTRY) { ++ hint->type = UNWIND_HINT_TYPE_CALL; ++ insn->entry = 1; ++ } ++ + if (hint->type == UNWIND_HINT_TYPE_FUNC) { + insn->cfi = &func_cfi; + continue; +@@ -2116,8 +2124,9 @@ static int read_retpoline_hints(struct objtool_file *file) + + if (insn->type != INSN_JUMP_DYNAMIC && + insn->type != INSN_CALL_DYNAMIC && +- insn->type != INSN_RETURN) { +- WARN_FUNC("retpoline_safe hint not an indirect jump/call/ret", ++ insn->type != INSN_RETURN && ++ insn->type != INSN_NOP) { ++ WARN_FUNC("retpoline_safe hint not an indirect jump/call/ret/nop", + insn->sec, insn->offset); + return -1; + } +@@ -3305,8 +3314,8 @@ static int validate_branch(struct objtool_file *file, struct symbol *func, + return 1; + } + +- visited = 1 << state.uaccess; +- if (insn->visited) { ++ visited = VISITED_BRANCH << state.uaccess; ++ if (insn->visited & VISITED_BRANCH_MASK) { + if (!insn->hint && !insn_cfi_match(insn, &state.cfi)) + return 1; + +@@ -3520,6 +3529,145 @@ static int validate_unwind_hints(struct objtool_file *file, struct section *sec) + return warnings; + } + ++/* ++ * Validate rethunk entry constraint: must untrain RET before the first RET. ++ * ++ * Follow every branch (intra-function) and ensure ANNOTATE_UNRET_END comes ++ * before an actual RET instruction. ++ */ ++static int validate_entry(struct objtool_file *file, struct instruction *insn) ++{ ++ struct instruction *next, *dest; ++ int ret, warnings = 0; ++ ++ for (;;) { ++ next = next_insn_to_validate(file, insn); ++ ++ if (insn->visited & VISITED_ENTRY) ++ return 0; ++ ++ insn->visited |= VISITED_ENTRY; ++ ++ if (!insn->ignore_alts && !list_empty(&insn->alts)) { ++ struct alternative *alt; ++ bool skip_orig = false; ++ ++ list_for_each_entry(alt, &insn->alts, list) { ++ if (alt->skip_orig) ++ skip_orig = true; ++ ++ ret = validate_entry(file, alt->insn); ++ if (ret) { ++ if (opts.backtrace) ++ BT_FUNC("(alt)", insn); ++ return ret; ++ } ++ } ++ ++ if (skip_orig) ++ return 0; ++ } ++ ++ switch (insn->type) { ++ ++ case INSN_CALL_DYNAMIC: ++ case INSN_JUMP_DYNAMIC: ++ case INSN_JUMP_DYNAMIC_CONDITIONAL: ++ WARN_FUNC("early indirect call", insn->sec, insn->offset); ++ return 1; ++ ++ case INSN_JUMP_UNCONDITIONAL: ++ case INSN_JUMP_CONDITIONAL: ++ if (!is_sibling_call(insn)) { ++ if (!insn->jump_dest) { ++ WARN_FUNC("unresolved jump target after linking?!?", ++ insn->sec, insn->offset); ++ return -1; ++ } ++ ret = validate_entry(file, insn->jump_dest); ++ if (ret) { ++ if (opts.backtrace) { ++ BT_FUNC("(branch%s)", insn, ++ insn->type == INSN_JUMP_CONDITIONAL ? "-cond" : ""); ++ } ++ return ret; ++ } ++ ++ if (insn->type == INSN_JUMP_UNCONDITIONAL) ++ return 0; ++ ++ break; ++ } ++ ++ /* fallthrough */ ++ case INSN_CALL: ++ dest = find_insn(file, insn->call_dest->sec, ++ insn->call_dest->offset); ++ if (!dest) { ++ WARN("Unresolved function after linking!?: %s", ++ insn->call_dest->name); ++ return -1; ++ } ++ ++ ret = validate_entry(file, dest); ++ if (ret) { ++ if (opts.backtrace) ++ BT_FUNC("(call)", insn); ++ return ret; ++ } ++ /* ++ * If a call returns without error, it must have seen UNTRAIN_RET. ++ * Therefore any non-error return is a success. ++ */ ++ return 0; ++ ++ case INSN_RETURN: ++ WARN_FUNC("RET before UNTRAIN", insn->sec, insn->offset); ++ return 1; ++ ++ case INSN_NOP: ++ if (insn->retpoline_safe) ++ return 0; ++ break; ++ ++ default: ++ break; ++ } ++ ++ if (!next) { ++ WARN_FUNC("teh end!", insn->sec, insn->offset); ++ return -1; ++ } ++ insn = next; ++ } ++ ++ return warnings; ++} ++ ++/* ++ * Validate that all branches starting at 'insn->entry' encounter UNRET_END ++ * before RET. ++ */ ++static int validate_unret(struct objtool_file *file) ++{ ++ struct instruction *insn; ++ int ret, warnings = 0; ++ ++ for_each_insn(file, insn) { ++ if (!insn->entry) ++ continue; ++ ++ ret = validate_entry(file, insn); ++ if (ret < 0) { ++ WARN_FUNC("Failed UNRET validation", insn->sec, insn->offset); ++ return ret; ++ } ++ warnings += ret; ++ } ++ ++ return warnings; ++} ++ + static int validate_retpoline(struct objtool_file *file) + { + struct instruction *insn; +@@ -4039,6 +4187,17 @@ int check(struct objtool_file *file) + warnings += ret; + } + ++ if (opts.unret) { ++ /* ++ * Must be after validate_branch() and friends, it plays ++ * further games with insn->visited. ++ */ ++ ret = validate_unret(file); ++ if (ret < 0) ++ return ret; ++ warnings += ret; ++ } ++ + if (opts.ibt) { + ret = validate_ibt(file); + if (ret < 0) +diff --git a/tools/objtool/include/objtool/builtin.h b/tools/objtool/include/objtool/builtin.h +index 280ea18b7f2b..0c476b0b40a3 100644 +--- a/tools/objtool/include/objtool/builtin.h ++++ b/tools/objtool/include/objtool/builtin.h +@@ -19,6 +19,7 @@ struct opts { + bool noinstr; + bool orc; + bool retpoline; ++ bool unret; + bool sls; + bool stackval; + bool static_call; +diff --git a/tools/objtool/include/objtool/check.h b/tools/objtool/include/objtool/check.h +index f10d7374f388..0eeedeacbefb 100644 +--- a/tools/objtool/include/objtool/check.h ++++ b/tools/objtool/include/objtool/check.h +@@ -51,8 +51,10 @@ struct instruction { + ignore_alts : 1, + hint : 1, + retpoline_safe : 1, +- noendbr : 1; +- /* 2 bit hole */ ++ noendbr : 1, ++ entry : 1; ++ /* 1 bit hole */ ++ + s8 instr; + u8 visited; + /* u8 hole */ +@@ -69,6 +71,11 @@ struct instruction { + struct cfi_state *cfi; + }; + ++#define VISITED_BRANCH 0x01 ++#define VISITED_BRANCH_UACCESS 0x02 ++#define VISITED_BRANCH_MASK 0x03 ++#define VISITED_ENTRY 0x04 ++ + static inline bool is_static_jump(struct instruction *insn) + { + return insn->type == INSN_JUMP_CONDITIONAL || diff --git a/debian/patches/bugfix/x86/retbleed/0034-x86-cpu-amd-Add-Spectral-Chicken.patch b/debian/patches/bugfix/x86/retbleed/0034-x86-cpu-amd-Add-Spectral-Chicken.patch new file mode 100644 index 000000000..a1c73d5c9 --- /dev/null +++ b/debian/patches/bugfix/x86/retbleed/0034-x86-cpu-amd-Add-Spectral-Chicken.patch @@ -0,0 +1,109 @@ +From: Peter Zijlstra <peterz@infradead.org> +Date: Tue, 14 Jun 2022 23:16:04 +0200 +Subject: x86/cpu/amd: Add Spectral Chicken +Origin: https://git.kernel.org/linus/d7caac991feeef1b871ee6988fd2c9725df09039 + +Zen2 uarchs have an undocumented, unnamed, MSR that contains a chicken +bit for some speculation behaviour. It needs setting. + +Note: very belatedly AMD released naming; it's now officially called + MSR_AMD64_DE_CFG2 and MSR_AMD64_DE_CFG2_SUPPRESS_NOBR_PRED_BIT + but shall remain the SPECTRAL CHICKEN. + +Suggested-by: Andrew Cooper <Andrew.Cooper3@citrix.com> +Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> +Signed-off-by: Borislav Petkov <bp@suse.de> +Reviewed-by: Josh Poimboeuf <jpoimboe@kernel.org> +Signed-off-by: Borislav Petkov <bp@suse.de> +--- + arch/x86/include/asm/msr-index.h | 3 +++ + arch/x86/kernel/cpu/amd.c | 23 ++++++++++++++++++++++- + arch/x86/kernel/cpu/cpu.h | 2 ++ + arch/x86/kernel/cpu/hygon.c | 6 ++++++ + 4 files changed, 33 insertions(+), 1 deletion(-) + +diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h +index 059c7e500ab4..7b411d9a6efb 100644 +--- a/arch/x86/include/asm/msr-index.h ++++ b/arch/x86/include/asm/msr-index.h +@@ -568,6 +568,9 @@ + /* Fam 17h MSRs */ + #define MSR_F17H_IRPERF 0xc00000e9 + ++#define MSR_ZEN2_SPECTRAL_CHICKEN 0xc00110e3 ++#define MSR_ZEN2_SPECTRAL_CHICKEN_BIT BIT_ULL(1) ++ + /* Fam 16h MSRs */ + #define MSR_F16H_L2I_PERF_CTL 0xc0010230 + #define MSR_F16H_L2I_PERF_CTR 0xc0010231 +diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c +index 0c0b09796ced..8cf0659c0521 100644 +--- a/arch/x86/kernel/cpu/amd.c ++++ b/arch/x86/kernel/cpu/amd.c +@@ -862,6 +862,26 @@ static void init_amd_bd(struct cpuinfo_x86 *c) + clear_rdrand_cpuid_bit(c); + } + ++void init_spectral_chicken(struct cpuinfo_x86 *c) ++{ ++ u64 value; ++ ++ /* ++ * On Zen2 we offer this chicken (bit) on the altar of Speculation. ++ * ++ * This suppresses speculation from the middle of a basic block, i.e. it ++ * suppresses non-branch predictions. ++ * ++ * We use STIBP as a heuristic to filter out Zen2 from the rest of F17H ++ */ ++ if (!cpu_has(c, X86_FEATURE_HYPERVISOR) && cpu_has(c, X86_FEATURE_AMD_STIBP)) { ++ if (!rdmsrl_safe(MSR_ZEN2_SPECTRAL_CHICKEN, &value)) { ++ value |= MSR_ZEN2_SPECTRAL_CHICKEN_BIT; ++ wrmsrl_safe(MSR_ZEN2_SPECTRAL_CHICKEN, value); ++ } ++ } ++} ++ + static void init_amd_zn(struct cpuinfo_x86 *c) + { + set_cpu_cap(c, X86_FEATURE_ZEN); +@@ -907,7 +927,8 @@ static void init_amd(struct cpuinfo_x86 *c) + case 0x12: init_amd_ln(c); break; + case 0x15: init_amd_bd(c); break; + case 0x16: init_amd_jg(c); break; +- case 0x17: fallthrough; ++ case 0x17: init_spectral_chicken(c); ++ fallthrough; + case 0x19: init_amd_zn(c); break; + } + +diff --git a/arch/x86/kernel/cpu/cpu.h b/arch/x86/kernel/cpu/cpu.h +index 2a8e584fc991..7c9b5893c30a 100644 +--- a/arch/x86/kernel/cpu/cpu.h ++++ b/arch/x86/kernel/cpu/cpu.h +@@ -61,6 +61,8 @@ static inline void tsx_init(void) { } + static inline void tsx_ap_init(void) { } + #endif /* CONFIG_CPU_SUP_INTEL */ + ++extern void init_spectral_chicken(struct cpuinfo_x86 *c); ++ + extern void get_cpu_cap(struct cpuinfo_x86 *c); + extern void get_cpu_address_sizes(struct cpuinfo_x86 *c); + extern void cpu_detect_cache_sizes(struct cpuinfo_x86 *c); +diff --git a/arch/x86/kernel/cpu/hygon.c b/arch/x86/kernel/cpu/hygon.c +index 3fcdda4c1e11..21fd425088fe 100644 +--- a/arch/x86/kernel/cpu/hygon.c ++++ b/arch/x86/kernel/cpu/hygon.c +@@ -302,6 +302,12 @@ static void init_hygon(struct cpuinfo_x86 *c) + /* get apicid instead of initial apic id from cpuid */ + c->apicid = hard_smp_processor_id(); + ++ /* ++ * XXX someone from Hygon needs to confirm this DTRT ++ * ++ init_spectral_chicken(c); ++ */ ++ + set_cpu_cap(c, X86_FEATURE_ZEN); + set_cpu_cap(c, X86_FEATURE_CPB); + diff --git a/debian/patches/bugfix/x86/retbleed/0035-x86-speculation-Fix-RSB-filling-with-CONFIG_RETPOLIN.patch b/debian/patches/bugfix/x86/retbleed/0035-x86-speculation-Fix-RSB-filling-with-CONFIG_RETPOLIN.patch new file mode 100644 index 000000000..a81fefcec --- /dev/null +++ b/debian/patches/bugfix/x86/retbleed/0035-x86-speculation-Fix-RSB-filling-with-CONFIG_RETPOLIN.patch @@ -0,0 +1,77 @@ +From: Josh Poimboeuf <jpoimboe@kernel.org> +Date: Tue, 14 Jun 2022 23:16:05 +0200 +Subject: x86/speculation: Fix RSB filling with CONFIG_RETPOLINE=n +Origin: https://git.kernel.org/linus/b2620facef4889fefcbf2e87284f34dcd4189bce + +If a kernel is built with CONFIG_RETPOLINE=n, but the user still wants +to mitigate Spectre v2 using IBRS or eIBRS, the RSB filling will be +silently disabled. + +There's nothing retpoline-specific about RSB buffer filling. Remove the +CONFIG_RETPOLINE guards around it. + +Signed-off-by: Josh Poimboeuf <jpoimboe@kernel.org> +Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> +Signed-off-by: Borislav Petkov <bp@suse.de> +--- + arch/x86/entry/entry_32.S | 2 -- + arch/x86/entry/entry_64.S | 2 -- + arch/x86/include/asm/nospec-branch.h | 2 -- + 3 files changed, 6 deletions(-) + +diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S +index 887420844066..e309e7156038 100644 +--- a/arch/x86/entry/entry_32.S ++++ b/arch/x86/entry/entry_32.S +@@ -698,7 +698,6 @@ SYM_CODE_START(__switch_to_asm) + movl %ebx, PER_CPU_VAR(__stack_chk_guard) + #endif + +-#ifdef CONFIG_RETPOLINE + /* + * When switching from a shallower to a deeper call stack + * the RSB may either underflow or use entries populated +@@ -707,7 +706,6 @@ SYM_CODE_START(__switch_to_asm) + * speculative execution to prevent attack. + */ + FILL_RETURN_BUFFER %ebx, RSB_CLEAR_LOOPS, X86_FEATURE_RSB_CTXSW +-#endif + + /* Restore flags or the incoming task to restore AC state. */ + popfl +diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S +index 65e3b8b7cbe5..a4ba162e52c3 100644 +--- a/arch/x86/entry/entry_64.S ++++ b/arch/x86/entry/entry_64.S +@@ -255,7 +255,6 @@ SYM_FUNC_START(__switch_to_asm) + movq %rbx, PER_CPU_VAR(fixed_percpu_data) + stack_canary_offset + #endif + +-#ifdef CONFIG_RETPOLINE + /* + * When switching from a shallower to a deeper call stack + * the RSB may either underflow or use entries populated +@@ -264,7 +263,6 @@ SYM_FUNC_START(__switch_to_asm) + * speculative execution to prevent attack. + */ + FILL_RETURN_BUFFER %r12, RSB_CLEAR_LOOPS, X86_FEATURE_RSB_CTXSW +-#endif + + /* restore callee-saved registers */ + popq %r15 +diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h +index bba42bd78edf..08b03c12e6c2 100644 +--- a/arch/x86/include/asm/nospec-branch.h ++++ b/arch/x86/include/asm/nospec-branch.h +@@ -122,11 +122,9 @@ + * monstrosity above, manually. + */ + .macro FILL_RETURN_BUFFER reg:req nr:req ftr:req +-#ifdef CONFIG_RETPOLINE + ALTERNATIVE "jmp .Lskip_rsb_\@", "", \ftr + __FILL_RETURN_BUFFER(\reg,\nr,%_ASM_SP) + .Lskip_rsb_\@: +-#endif + .endm + + /* diff --git a/debian/patches/bugfix/x86/retbleed/0036-x86-speculation-Fix-firmware-entry-SPEC_CTRL-handlin.patch b/debian/patches/bugfix/x86/retbleed/0036-x86-speculation-Fix-firmware-entry-SPEC_CTRL-handlin.patch new file mode 100644 index 000000000..3942af530 --- /dev/null +++ b/debian/patches/bugfix/x86/retbleed/0036-x86-speculation-Fix-firmware-entry-SPEC_CTRL-handlin.patch @@ -0,0 +1,41 @@ +From: Josh Poimboeuf <jpoimboe@kernel.org> +Date: Tue, 14 Jun 2022 23:16:06 +0200 +Subject: x86/speculation: Fix firmware entry SPEC_CTRL handling +Origin: https://git.kernel.org/linus/e6aa13622ea8283cc699cac5d018cc40a2ba2010 + +The firmware entry code may accidentally clear STIBP or SSBD. Fix that. + +Signed-off-by: Josh Poimboeuf <jpoimboe@kernel.org> +Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> +Signed-off-by: Borislav Petkov <bp@suse.de> +--- + arch/x86/include/asm/nospec-branch.h | 10 ++++------ + 1 file changed, 4 insertions(+), 6 deletions(-) + +diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h +index 08b03c12e6c2..dee9ef77af13 100644 +--- a/arch/x86/include/asm/nospec-branch.h ++++ b/arch/x86/include/asm/nospec-branch.h +@@ -285,18 +285,16 @@ extern u64 spec_ctrl_current(void); + */ + #define firmware_restrict_branch_speculation_start() \ + do { \ +- u64 val = x86_spec_ctrl_base | SPEC_CTRL_IBRS; \ +- \ + preempt_disable(); \ +- alternative_msr_write(MSR_IA32_SPEC_CTRL, val, \ ++ alternative_msr_write(MSR_IA32_SPEC_CTRL, \ ++ spec_ctrl_current() | SPEC_CTRL_IBRS, \ + X86_FEATURE_USE_IBRS_FW); \ + } while (0) + + #define firmware_restrict_branch_speculation_end() \ + do { \ +- u64 val = x86_spec_ctrl_base; \ +- \ +- alternative_msr_write(MSR_IA32_SPEC_CTRL, val, \ ++ alternative_msr_write(MSR_IA32_SPEC_CTRL, \ ++ spec_ctrl_current(), \ + X86_FEATURE_USE_IBRS_FW); \ + preempt_enable(); \ + } while (0) diff --git a/debian/patches/bugfix/x86/retbleed/0037-x86-speculation-Fix-SPEC_CTRL-write-on-SMT-state-cha.patch b/debian/patches/bugfix/x86/retbleed/0037-x86-speculation-Fix-SPEC_CTRL-write-on-SMT-state-cha.patch new file mode 100644 index 000000000..c300039da --- /dev/null +++ b/debian/patches/bugfix/x86/retbleed/0037-x86-speculation-Fix-SPEC_CTRL-write-on-SMT-state-cha.patch @@ -0,0 +1,29 @@ +From: Josh Poimboeuf <jpoimboe@kernel.org> +Date: Tue, 14 Jun 2022 23:16:07 +0200 +Subject: x86/speculation: Fix SPEC_CTRL write on SMT state change +Origin: https://git.kernel.org/linus/56aa4d221f1ee2c3a49b45b800778ec6e0ab73c5 + +If the SMT state changes, SSBD might get accidentally disabled. Fix +that. + +Signed-off-by: Josh Poimboeuf <jpoimboe@kernel.org> +Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> +Signed-off-by: Borislav Petkov <bp@suse.de> +--- + arch/x86/kernel/cpu/bugs.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c +index 108bd74289c5..9b80059c4e02 100644 +--- a/arch/x86/kernel/cpu/bugs.c ++++ b/arch/x86/kernel/cpu/bugs.c +@@ -1458,7 +1458,8 @@ static void __init spectre_v2_select_mitigation(void) + + static void update_stibp_msr(void * __unused) + { +- write_spec_ctrl_current(x86_spec_ctrl_base, true); ++ u64 val = spec_ctrl_current() | (x86_spec_ctrl_base & SPEC_CTRL_STIBP); ++ write_spec_ctrl_current(val, true); + } + + /* Update x86_spec_ctrl_base in case SMT state changed. */ diff --git a/debian/patches/bugfix/x86/retbleed/0038-x86-speculation-Use-cached-host-SPEC_CTRL-value-for-.patch b/debian/patches/bugfix/x86/retbleed/0038-x86-speculation-Use-cached-host-SPEC_CTRL-value-for-.patch new file mode 100644 index 000000000..bee1127b1 --- /dev/null +++ b/debian/patches/bugfix/x86/retbleed/0038-x86-speculation-Use-cached-host-SPEC_CTRL-value-for-.patch @@ -0,0 +1,52 @@ +From: Josh Poimboeuf <jpoimboe@kernel.org> +Date: Tue, 14 Jun 2022 23:16:08 +0200 +Subject: x86/speculation: Use cached host SPEC_CTRL value for guest entry/exit +Origin: https://git.kernel.org/linus/bbb69e8bee1bd882784947095ffb2bfe0f7c9470 + +There's no need to recalculate the host value for every entry/exit. +Just use the cached value in spec_ctrl_current(). + +Signed-off-by: Josh Poimboeuf <jpoimboe@kernel.org> +Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> +Signed-off-by: Borislav Petkov <bp@suse.de> +--- + arch/x86/kernel/cpu/bugs.c | 12 +----------- + 1 file changed, 1 insertion(+), 11 deletions(-) + +diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c +index 9b80059c4e02..f884f9497666 100644 +--- a/arch/x86/kernel/cpu/bugs.c ++++ b/arch/x86/kernel/cpu/bugs.c +@@ -208,7 +208,7 @@ void __init check_bugs(void) + void + x86_virt_spec_ctrl(u64 guest_spec_ctrl, u64 guest_virt_spec_ctrl, bool setguest) + { +- u64 msrval, guestval, hostval = x86_spec_ctrl_base; ++ u64 msrval, guestval, hostval = spec_ctrl_current(); + struct thread_info *ti = current_thread_info(); + + /* Is MSR_SPEC_CTRL implemented ? */ +@@ -221,15 +221,6 @@ x86_virt_spec_ctrl(u64 guest_spec_ctrl, u64 guest_virt_spec_ctrl, bool setguest) + guestval = hostval & ~x86_spec_ctrl_mask; + guestval |= guest_spec_ctrl & x86_spec_ctrl_mask; + +- /* SSBD controlled in MSR_SPEC_CTRL */ +- if (static_cpu_has(X86_FEATURE_SPEC_CTRL_SSBD) || +- static_cpu_has(X86_FEATURE_AMD_SSBD)) +- hostval |= ssbd_tif_to_spec_ctrl(ti->flags); +- +- /* Conditional STIBP enabled? */ +- if (static_branch_unlikely(&switch_to_cond_stibp)) +- hostval |= stibp_tif_to_spec_ctrl(ti->flags); +- + if (hostval != guestval) { + msrval = setguest ? guestval : hostval; + wrmsrl(MSR_IA32_SPEC_CTRL, msrval); +@@ -1397,7 +1388,6 @@ static void __init spectre_v2_select_mitigation(void) + pr_err(SPECTRE_V2_EIBRS_EBPF_MSG); + + if (spectre_v2_in_ibrs_mode(mode)) { +- /* Force it so VMEXIT will restore correctly */ + x86_spec_ctrl_base |= SPEC_CTRL_IBRS; + write_spec_ctrl_current(x86_spec_ctrl_base, true); + } diff --git a/debian/patches/bugfix/x86/retbleed/0039-x86-speculation-Remove-x86_spec_ctrl_mask.patch b/debian/patches/bugfix/x86/retbleed/0039-x86-speculation-Remove-x86_spec_ctrl_mask.patch new file mode 100644 index 000000000..d5cb8f449 --- /dev/null +++ b/debian/patches/bugfix/x86/retbleed/0039-x86-speculation-Remove-x86_spec_ctrl_mask.patch @@ -0,0 +1,83 @@ +From: Josh Poimboeuf <jpoimboe@kernel.org> +Date: Fri, 17 Jun 2022 12:12:48 -0700 +Subject: x86/speculation: Remove x86_spec_ctrl_mask +Origin: https://git.kernel.org/linus/acac5e98ef8d638a411cfa2ee676c87e1973f126 + +This mask has been made redundant by kvm_spec_ctrl_test_value(). And it +doesn't even work when MSR interception is disabled, as the guest can +just write to SPEC_CTRL directly. + +Signed-off-by: Josh Poimboeuf <jpoimboe@kernel.org> +Signed-off-by: Borislav Petkov <bp@suse.de> +Reviewed-by: Paolo Bonzini <pbonzini@redhat.com> +Signed-off-by: Borislav Petkov <bp@suse.de> +--- + arch/x86/kernel/cpu/bugs.c | 31 +------------------------------ + 1 file changed, 1 insertion(+), 30 deletions(-) + +diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c +index f884f9497666..efff8b9f1bbd 100644 +--- a/arch/x86/kernel/cpu/bugs.c ++++ b/arch/x86/kernel/cpu/bugs.c +@@ -85,12 +85,6 @@ u64 spec_ctrl_current(void) + } + EXPORT_SYMBOL_GPL(spec_ctrl_current); + +-/* +- * The vendor and possibly platform specific bits which can be modified in +- * x86_spec_ctrl_base. +- */ +-static u64 __ro_after_init x86_spec_ctrl_mask = SPEC_CTRL_IBRS; +- + /* + * AMD specific MSR info for Speculative Store Bypass control. + * x86_amd_ls_cfg_ssbd_mask is initialized in identify_boot_cpu(). +@@ -146,10 +140,6 @@ void __init check_bugs(void) + if (boot_cpu_has(X86_FEATURE_MSR_SPEC_CTRL)) + rdmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base); + +- /* Allow STIBP in MSR_SPEC_CTRL if supported */ +- if (boot_cpu_has(X86_FEATURE_STIBP)) +- x86_spec_ctrl_mask |= SPEC_CTRL_STIBP; +- + /* Select the proper CPU mitigations before patching alternatives: */ + spectre_v1_select_mitigation(); + spectre_v2_select_mitigation(); +@@ -208,19 +198,10 @@ void __init check_bugs(void) + void + x86_virt_spec_ctrl(u64 guest_spec_ctrl, u64 guest_virt_spec_ctrl, bool setguest) + { +- u64 msrval, guestval, hostval = spec_ctrl_current(); ++ u64 msrval, guestval = guest_spec_ctrl, hostval = spec_ctrl_current(); + struct thread_info *ti = current_thread_info(); + +- /* Is MSR_SPEC_CTRL implemented ? */ + if (static_cpu_has(X86_FEATURE_MSR_SPEC_CTRL)) { +- /* +- * Restrict guest_spec_ctrl to supported values. Clear the +- * modifiable bits in the host base value and or the +- * modifiable bits from the guest value. +- */ +- guestval = hostval & ~x86_spec_ctrl_mask; +- guestval |= guest_spec_ctrl & x86_spec_ctrl_mask; +- + if (hostval != guestval) { + msrval = setguest ? guestval : hostval; + wrmsrl(MSR_IA32_SPEC_CTRL, msrval); +@@ -1665,16 +1646,6 @@ static enum ssb_mitigation __init __ssb_select_mitigation(void) + break; + } + +- /* +- * If SSBD is controlled by the SPEC_CTRL MSR, then set the proper +- * bit in the mask to allow guests to use the mitigation even in the +- * case where the host does not enable it. +- */ +- if (static_cpu_has(X86_FEATURE_SPEC_CTRL_SSBD) || +- static_cpu_has(X86_FEATURE_AMD_SSBD)) { +- x86_spec_ctrl_mask |= SPEC_CTRL_SSBD; +- } +- + /* + * We have three CPU feature flags that are in play here: + * - X86_BUG_SPEC_STORE_BYPASS - CPU is susceptible. diff --git a/debian/patches/bugfix/x86/retbleed/0040-objtool-Re-add-UNWIND_HINT_-SAVE_RESTORE.patch b/debian/patches/bugfix/x86/retbleed/0040-objtool-Re-add-UNWIND_HINT_-SAVE_RESTORE.patch new file mode 100644 index 000000000..b24134fe5 --- /dev/null +++ b/debian/patches/bugfix/x86/retbleed/0040-objtool-Re-add-UNWIND_HINT_-SAVE_RESTORE.patch @@ -0,0 +1,209 @@ +From: Josh Poimboeuf <jpoimboe@kernel.org> +Date: Fri, 24 Jun 2022 12:52:40 +0200 +Subject: objtool: Re-add UNWIND_HINT_{SAVE_RESTORE} +Origin: https://git.kernel.org/linus/8faea26e611189e933ea2281975ff4dc7c1106b6 + +Commit + + c536ed2fffd5 ("objtool: Remove SAVE/RESTORE hints") + +removed the save/restore unwind hints because they were no longer +needed. Now they're going to be needed again so re-add them. + +Signed-off-by: Josh Poimboeuf <jpoimboe@kernel.org> +Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> +Signed-off-by: Borislav Petkov <bp@suse.de> +--- + arch/x86/include/asm/unwind_hints.h | 12 ++++++-- + include/linux/objtool.h | 6 ++-- + tools/include/linux/objtool.h | 6 ++-- + tools/objtool/check.c | 40 +++++++++++++++++++++++++++ + tools/objtool/include/objtool/check.h | 19 +++++++------ + 5 files changed, 68 insertions(+), 15 deletions(-) + +diff --git a/arch/x86/include/asm/unwind_hints.h b/arch/x86/include/asm/unwind_hints.h +index 6f70fe4c93f2..f66fbe6537dd 100644 +--- a/arch/x86/include/asm/unwind_hints.h ++++ b/arch/x86/include/asm/unwind_hints.h +@@ -8,11 +8,11 @@ + #ifdef __ASSEMBLY__ + + .macro UNWIND_HINT_EMPTY +- UNWIND_HINT sp_reg=ORC_REG_UNDEFINED type=UNWIND_HINT_TYPE_CALL end=1 ++ UNWIND_HINT type=UNWIND_HINT_TYPE_CALL end=1 + .endm + + .macro UNWIND_HINT_ENTRY +- UNWIND_HINT sp_reg=ORC_REG_UNDEFINED type=UNWIND_HINT_TYPE_ENTRY end=1 ++ UNWIND_HINT type=UNWIND_HINT_TYPE_ENTRY end=1 + .endm + + .macro UNWIND_HINT_REGS base=%rsp offset=0 indirect=0 extra=1 partial=0 +@@ -56,6 +56,14 @@ + UNWIND_HINT sp_reg=ORC_REG_SP sp_offset=8 type=UNWIND_HINT_TYPE_FUNC + .endm + ++.macro UNWIND_HINT_SAVE ++ UNWIND_HINT type=UNWIND_HINT_TYPE_SAVE ++.endm ++ ++.macro UNWIND_HINT_RESTORE ++ UNWIND_HINT type=UNWIND_HINT_TYPE_RESTORE ++.endm ++ + #else + + #define UNWIND_HINT_FUNC \ +diff --git a/include/linux/objtool.h b/include/linux/objtool.h +index b026f1ae39c6..10bc88cc3bf6 100644 +--- a/include/linux/objtool.h ++++ b/include/linux/objtool.h +@@ -40,6 +40,8 @@ struct unwind_hint { + #define UNWIND_HINT_TYPE_REGS_PARTIAL 2 + #define UNWIND_HINT_TYPE_FUNC 3 + #define UNWIND_HINT_TYPE_ENTRY 4 ++#define UNWIND_HINT_TYPE_SAVE 5 ++#define UNWIND_HINT_TYPE_RESTORE 6 + + #ifdef CONFIG_OBJTOOL + +@@ -127,7 +129,7 @@ struct unwind_hint { + * the debuginfo as necessary. It will also warn if it sees any + * inconsistencies. + */ +-.macro UNWIND_HINT sp_reg:req sp_offset=0 type:req end=0 ++.macro UNWIND_HINT type:req sp_reg=0 sp_offset=0 end=0 + .Lunwind_hint_ip_\@: + .pushsection .discard.unwind_hints + /* struct unwind_hint */ +@@ -180,7 +182,7 @@ struct unwind_hint { + #define ASM_REACHABLE + #else + #define ANNOTATE_INTRA_FUNCTION_CALL +-.macro UNWIND_HINT sp_reg:req sp_offset=0 type:req end=0 ++.macro UNWIND_HINT type:req sp_reg=0 sp_offset=0 end=0 + .endm + .macro STACK_FRAME_NON_STANDARD func:req + .endm +diff --git a/tools/include/linux/objtool.h b/tools/include/linux/objtool.h +index b026f1ae39c6..10bc88cc3bf6 100644 +--- a/tools/include/linux/objtool.h ++++ b/tools/include/linux/objtool.h +@@ -40,6 +40,8 @@ struct unwind_hint { + #define UNWIND_HINT_TYPE_REGS_PARTIAL 2 + #define UNWIND_HINT_TYPE_FUNC 3 + #define UNWIND_HINT_TYPE_ENTRY 4 ++#define UNWIND_HINT_TYPE_SAVE 5 ++#define UNWIND_HINT_TYPE_RESTORE 6 + + #ifdef CONFIG_OBJTOOL + +@@ -127,7 +129,7 @@ struct unwind_hint { + * the debuginfo as necessary. It will also warn if it sees any + * inconsistencies. + */ +-.macro UNWIND_HINT sp_reg:req sp_offset=0 type:req end=0 ++.macro UNWIND_HINT type:req sp_reg=0 sp_offset=0 end=0 + .Lunwind_hint_ip_\@: + .pushsection .discard.unwind_hints + /* struct unwind_hint */ +@@ -180,7 +182,7 @@ struct unwind_hint { + #define ASM_REACHABLE + #else + #define ANNOTATE_INTRA_FUNCTION_CALL +-.macro UNWIND_HINT sp_reg:req sp_offset=0 type:req end=0 ++.macro UNWIND_HINT type:req sp_reg=0 sp_offset=0 end=0 + .endm + .macro STACK_FRAME_NON_STANDARD func:req + .endm +diff --git a/tools/objtool/check.c b/tools/objtool/check.c +index 822a490e6d87..ddfdd138cc2a 100644 +--- a/tools/objtool/check.c ++++ b/tools/objtool/check.c +@@ -2032,6 +2032,17 @@ static int read_unwind_hints(struct objtool_file *file) + + insn->hint = true; + ++ if (hint->type == UNWIND_HINT_TYPE_SAVE) { ++ insn->hint = false; ++ insn->save = true; ++ continue; ++ } ++ ++ if (hint->type == UNWIND_HINT_TYPE_RESTORE) { ++ insn->restore = true; ++ continue; ++ } ++ + if (hint->type == UNWIND_HINT_TYPE_REGS_PARTIAL) { + struct symbol *sym = find_symbol_by_offset(insn->sec, insn->offset); + +@@ -3329,6 +3340,35 @@ static int validate_branch(struct objtool_file *file, struct symbol *func, + state.instr += insn->instr; + + if (insn->hint) { ++ if (insn->restore) { ++ struct instruction *save_insn, *i; ++ ++ i = insn; ++ save_insn = NULL; ++ ++ sym_for_each_insn_continue_reverse(file, func, i) { ++ if (i->save) { ++ save_insn = i; ++ break; ++ } ++ } ++ ++ if (!save_insn) { ++ WARN_FUNC("no corresponding CFI save for CFI restore", ++ sec, insn->offset); ++ return 1; ++ } ++ ++ if (!save_insn->visited) { ++ WARN_FUNC("objtool isn't smart enough to handle this CFI save/restore combo", ++ sec, insn->offset); ++ return 1; ++ } ++ ++ insn->cfi = save_insn->cfi; ++ nr_cfi_reused++; ++ } ++ + state.cfi = *insn->cfi; + } else { + /* XXX track if we actually changed state.cfi */ +diff --git a/tools/objtool/include/objtool/check.h b/tools/objtool/include/objtool/check.h +index 0eeedeacbefb..036129cebeee 100644 +--- a/tools/objtool/include/objtool/check.h ++++ b/tools/objtool/include/objtool/check.h +@@ -46,18 +46,19 @@ struct instruction { + enum insn_type type; + unsigned long immediate; + +- u8 dead_end : 1, +- ignore : 1, +- ignore_alts : 1, +- hint : 1, +- retpoline_safe : 1, +- noendbr : 1, +- entry : 1; +- /* 1 bit hole */ ++ u16 dead_end : 1, ++ ignore : 1, ++ ignore_alts : 1, ++ hint : 1, ++ save : 1, ++ restore : 1, ++ retpoline_safe : 1, ++ noendbr : 1, ++ entry : 1; ++ /* 7 bit hole */ + + s8 instr; + u8 visited; +- /* u8 hole */ + + struct alt_group *alt_group; + struct symbol *call_dest; diff --git a/debian/patches/bugfix/x86/retbleed/0041-KVM-VMX-Flatten-__vmx_vcpu_run.patch b/debian/patches/bugfix/x86/retbleed/0041-KVM-VMX-Flatten-__vmx_vcpu_run.patch new file mode 100644 index 000000000..f9d09b623 --- /dev/null +++ b/debian/patches/bugfix/x86/retbleed/0041-KVM-VMX-Flatten-__vmx_vcpu_run.patch @@ -0,0 +1,192 @@ +From: Josh Poimboeuf <jpoimboe@kernel.org> +Date: Tue, 14 Jun 2022 23:16:11 +0200 +Subject: KVM: VMX: Flatten __vmx_vcpu_run() +Origin: https://git.kernel.org/linus/8bd200d23ec42d66ccd517a72dd0b9cc6132d2fd + +Move the vmx_vm{enter,exit}() functionality into __vmx_vcpu_run(). This +will make it easier to do the spec_ctrl handling before the first RET. + +Signed-off-by: Josh Poimboeuf <jpoimboe@kernel.org> +Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> +Signed-off-by: Borislav Petkov <bp@suse.de> +--- + arch/x86/kvm/vmx/vmenter.S | 119 ++++++++++++++----------------------- + 1 file changed, 46 insertions(+), 73 deletions(-) + +diff --git a/arch/x86/kvm/vmx/vmenter.S b/arch/x86/kvm/vmx/vmenter.S +index 435c187927c4..c83163fb2e9c 100644 +--- a/arch/x86/kvm/vmx/vmenter.S ++++ b/arch/x86/kvm/vmx/vmenter.S +@@ -30,68 +30,6 @@ + + .section .noinstr.text, "ax" + +-/** +- * vmx_vmenter - VM-Enter the current loaded VMCS +- * +- * %RFLAGS.ZF: !VMCS.LAUNCHED, i.e. controls VMLAUNCH vs. VMRESUME +- * +- * Returns: +- * %RFLAGS.CF is set on VM-Fail Invalid +- * %RFLAGS.ZF is set on VM-Fail Valid +- * %RFLAGS.{CF,ZF} are cleared on VM-Success, i.e. VM-Exit +- * +- * Note that VMRESUME/VMLAUNCH fall-through and return directly if +- * they VM-Fail, whereas a successful VM-Enter + VM-Exit will jump +- * to vmx_vmexit. +- */ +-SYM_FUNC_START_LOCAL(vmx_vmenter) +- /* EFLAGS.ZF is set if VMCS.LAUNCHED == 0 */ +- je 2f +- +-1: vmresume +- RET +- +-2: vmlaunch +- RET +- +-3: cmpb $0, kvm_rebooting +- je 4f +- RET +-4: ud2 +- +- _ASM_EXTABLE(1b, 3b) +- _ASM_EXTABLE(2b, 3b) +- +-SYM_FUNC_END(vmx_vmenter) +- +-/** +- * vmx_vmexit - Handle a VMX VM-Exit +- * +- * Returns: +- * %RFLAGS.{CF,ZF} are cleared on VM-Success, i.e. VM-Exit +- * +- * This is vmx_vmenter's partner in crime. On a VM-Exit, control will jump +- * here after hardware loads the host's state, i.e. this is the destination +- * referred to by VMCS.HOST_RIP. +- */ +-SYM_FUNC_START(vmx_vmexit) +-#ifdef CONFIG_RETPOLINE +- ALTERNATIVE "jmp .Lvmexit_skip_rsb", "", X86_FEATURE_RETPOLINE +- /* Preserve guest's RAX, it's used to stuff the RSB. */ +- push %_ASM_AX +- +- /* IMPORTANT: Stuff the RSB immediately after VM-Exit, before RET! */ +- FILL_RETURN_BUFFER %_ASM_AX, RSB_CLEAR_LOOPS, X86_FEATURE_RETPOLINE +- +- /* Clear RFLAGS.CF and RFLAGS.ZF to preserve VM-Exit, i.e. !VM-Fail. */ +- or $1, %_ASM_AX +- +- pop %_ASM_AX +-.Lvmexit_skip_rsb: +-#endif +- RET +-SYM_FUNC_END(vmx_vmexit) +- + /** + * __vmx_vcpu_run - Run a vCPU via a transition to VMX guest mode + * @vmx: struct vcpu_vmx * (forwarded to vmx_update_host_rsp) +@@ -124,8 +62,7 @@ SYM_FUNC_START(__vmx_vcpu_run) + /* Copy @launched to BL, _ASM_ARG3 is volatile. */ + mov %_ASM_ARG3B, %bl + +- /* Adjust RSP to account for the CALL to vmx_vmenter(). */ +- lea -WORD_SIZE(%_ASM_SP), %_ASM_ARG2 ++ lea (%_ASM_SP), %_ASM_ARG2 + call vmx_update_host_rsp + + /* Load @regs to RAX. */ +@@ -154,11 +91,37 @@ SYM_FUNC_START(__vmx_vcpu_run) + /* Load guest RAX. This kills the @regs pointer! */ + mov VCPU_RAX(%_ASM_AX), %_ASM_AX + +- /* Enter guest mode */ +- call vmx_vmenter ++ /* Check EFLAGS.ZF from 'testb' above */ ++ je .Lvmlaunch ++ ++ /* ++ * After a successful VMRESUME/VMLAUNCH, control flow "magically" ++ * resumes below at 'vmx_vmexit' due to the VMCS HOST_RIP setting. ++ * So this isn't a typical function and objtool needs to be told to ++ * save the unwind state here and restore it below. ++ */ ++ UNWIND_HINT_SAVE ++ ++/* ++ * If VMRESUME/VMLAUNCH and corresponding vmexit succeed, execution resumes at ++ * the 'vmx_vmexit' label below. ++ */ ++.Lvmresume: ++ vmresume ++ jmp .Lvmfail ++ ++.Lvmlaunch: ++ vmlaunch ++ jmp .Lvmfail ++ ++ _ASM_EXTABLE(.Lvmresume, .Lfixup) ++ _ASM_EXTABLE(.Lvmlaunch, .Lfixup) + +- /* Jump on VM-Fail. */ +- jbe 2f ++SYM_INNER_LABEL(vmx_vmexit, SYM_L_GLOBAL) ++ ++ /* Restore unwind state from before the VMRESUME/VMLAUNCH. */ ++ UNWIND_HINT_RESTORE ++ ENDBR + + /* Temporarily save guest's RAX. */ + push %_ASM_AX +@@ -185,9 +148,13 @@ SYM_FUNC_START(__vmx_vcpu_run) + mov %r15, VCPU_R15(%_ASM_AX) + #endif + ++ /* IMPORTANT: RSB must be stuffed before the first return. */ ++ FILL_RETURN_BUFFER %_ASM_BX, RSB_CLEAR_LOOPS, X86_FEATURE_RETPOLINE ++ + /* Clear RAX to indicate VM-Exit (as opposed to VM-Fail). */ + xor %eax, %eax + ++.Lclear_regs: + /* + * Clear all general purpose registers except RSP and RAX to prevent + * speculative use of the guest's values, even those that are reloaded +@@ -197,7 +164,7 @@ SYM_FUNC_START(__vmx_vcpu_run) + * free. RSP and RAX are exempt as RSP is restored by hardware during + * VM-Exit and RAX is explicitly loaded with 0 or 1 to return VM-Fail. + */ +-1: xor %ecx, %ecx ++ xor %ecx, %ecx + xor %edx, %edx + xor %ebx, %ebx + xor %ebp, %ebp +@@ -216,8 +183,8 @@ SYM_FUNC_START(__vmx_vcpu_run) + + /* "POP" @regs. */ + add $WORD_SIZE, %_ASM_SP +- pop %_ASM_BX + ++ pop %_ASM_BX + #ifdef CONFIG_X86_64 + pop %r12 + pop %r13 +@@ -230,9 +197,15 @@ SYM_FUNC_START(__vmx_vcpu_run) + pop %_ASM_BP + RET + +- /* VM-Fail. Out-of-line to avoid a taken Jcc after VM-Exit. */ +-2: mov $1, %eax +- jmp 1b ++.Lfixup: ++ cmpb $0, kvm_rebooting ++ jne .Lvmfail ++ ud2 ++.Lvmfail: ++ /* VM-Fail: set return value to 1 */ ++ mov $1, %eax ++ jmp .Lclear_regs ++ + SYM_FUNC_END(__vmx_vcpu_run) + + diff --git a/debian/patches/bugfix/x86/retbleed/0042-KVM-VMX-Convert-launched-argument-to-flags.patch b/debian/patches/bugfix/x86/retbleed/0042-KVM-VMX-Convert-launched-argument-to-flags.patch new file mode 100644 index 000000000..9e04918ec --- /dev/null +++ b/debian/patches/bugfix/x86/retbleed/0042-KVM-VMX-Convert-launched-argument-to-flags.patch @@ -0,0 +1,175 @@ +From: Josh Poimboeuf <jpoimboe@kernel.org> +Date: Tue, 14 Jun 2022 23:16:12 +0200 +Subject: KVM: VMX: Convert launched argument to flags +Origin: https://git.kernel.org/linus/bb06650634d3552c0f8557e9d16aa1a408040e28 + +Convert __vmx_vcpu_run()'s 'launched' argument to 'flags', in +preparation for doing SPEC_CTRL handling immediately after vmexit, which +will need another flag. + +This is much easier than adding a fourth argument, because this code +supports both 32-bit and 64-bit, and the fourth argument on 32-bit would +have to be pushed on the stack. + +Note that __vmx_vcpu_run_flags() is called outside of the noinstr +critical section because it will soon start calling potentially +traceable functions. + +Signed-off-by: Josh Poimboeuf <jpoimboe@kernel.org> +Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> +Signed-off-by: Borislav Petkov <bp@suse.de> +--- + arch/x86/kvm/vmx/nested.c | 2 +- + arch/x86/kvm/vmx/run_flags.h | 7 +++++++ + arch/x86/kvm/vmx/vmenter.S | 9 +++++---- + arch/x86/kvm/vmx/vmx.c | 17 ++++++++++++++--- + arch/x86/kvm/vmx/vmx.h | 5 ++++- + 5 files changed, 31 insertions(+), 9 deletions(-) + create mode 100644 arch/x86/kvm/vmx/run_flags.h + +diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c +index f5cb18e00e78..3a4e895269d7 100644 +--- a/arch/x86/kvm/vmx/nested.c ++++ b/arch/x86/kvm/vmx/nested.c +@@ -3087,7 +3087,7 @@ static int nested_vmx_check_vmentry_hw(struct kvm_vcpu *vcpu) + } + + vm_fail = __vmx_vcpu_run(vmx, (unsigned long *)&vcpu->arch.regs, +- vmx->loaded_vmcs->launched); ++ __vmx_vcpu_run_flags(vmx)); + + if (vmx->msr_autoload.host.nr) + vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, vmx->msr_autoload.host.nr); +diff --git a/arch/x86/kvm/vmx/run_flags.h b/arch/x86/kvm/vmx/run_flags.h +new file mode 100644 +index 000000000000..57f4c664ea9c +--- /dev/null ++++ b/arch/x86/kvm/vmx/run_flags.h +@@ -0,0 +1,7 @@ ++/* SPDX-License-Identifier: GPL-2.0 */ ++#ifndef __KVM_X86_VMX_RUN_FLAGS_H ++#define __KVM_X86_VMX_RUN_FLAGS_H ++ ++#define VMX_RUN_VMRESUME (1 << 0) ++ ++#endif /* __KVM_X86_VMX_RUN_FLAGS_H */ +diff --git a/arch/x86/kvm/vmx/vmenter.S b/arch/x86/kvm/vmx/vmenter.S +index c83163fb2e9c..ddc3bf85db33 100644 +--- a/arch/x86/kvm/vmx/vmenter.S ++++ b/arch/x86/kvm/vmx/vmenter.S +@@ -5,6 +5,7 @@ + #include <asm/kvm_vcpu_regs.h> + #include <asm/nospec-branch.h> + #include <asm/segment.h> ++#include "run_flags.h" + + #define WORD_SIZE (BITS_PER_LONG / 8) + +@@ -34,7 +35,7 @@ + * __vmx_vcpu_run - Run a vCPU via a transition to VMX guest mode + * @vmx: struct vcpu_vmx * (forwarded to vmx_update_host_rsp) + * @regs: unsigned long * (to guest registers) +- * @launched: %true if the VMCS has been launched ++ * @flags: VMX_RUN_VMRESUME: use VMRESUME instead of VMLAUNCH + * + * Returns: + * 0 on VM-Exit, 1 on VM-Fail +@@ -59,7 +60,7 @@ SYM_FUNC_START(__vmx_vcpu_run) + */ + push %_ASM_ARG2 + +- /* Copy @launched to BL, _ASM_ARG3 is volatile. */ ++ /* Copy @flags to BL, _ASM_ARG3 is volatile. */ + mov %_ASM_ARG3B, %bl + + lea (%_ASM_SP), %_ASM_ARG2 +@@ -69,7 +70,7 @@ SYM_FUNC_START(__vmx_vcpu_run) + mov (%_ASM_SP), %_ASM_AX + + /* Check if vmlaunch or vmresume is needed */ +- testb %bl, %bl ++ testb $VMX_RUN_VMRESUME, %bl + + /* Load guest registers. Don't clobber flags. */ + mov VCPU_RCX(%_ASM_AX), %_ASM_CX +@@ -92,7 +93,7 @@ SYM_FUNC_START(__vmx_vcpu_run) + mov VCPU_RAX(%_ASM_AX), %_ASM_AX + + /* Check EFLAGS.ZF from 'testb' above */ +- je .Lvmlaunch ++ jz .Lvmlaunch + + /* + * After a successful VMRESUME/VMLAUNCH, control flow "magically" +diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c +index 009bbae9ad66..b255c2a5f680 100644 +--- a/arch/x86/kvm/vmx/vmx.c ++++ b/arch/x86/kvm/vmx/vmx.c +@@ -839,6 +839,16 @@ static bool msr_write_intercepted(struct vcpu_vmx *vmx, u32 msr) + MSR_IA32_SPEC_CTRL); + } + ++unsigned int __vmx_vcpu_run_flags(struct vcpu_vmx *vmx) ++{ ++ unsigned int flags = 0; ++ ++ if (vmx->loaded_vmcs->launched) ++ flags |= VMX_RUN_VMRESUME; ++ ++ return flags; ++} ++ + static void clear_atomic_switch_msr_special(struct vcpu_vmx *vmx, + unsigned long entry, unsigned long exit) + { +@@ -6826,7 +6836,8 @@ static fastpath_t vmx_exit_handlers_fastpath(struct kvm_vcpu *vcpu) + } + + static noinstr void vmx_vcpu_enter_exit(struct kvm_vcpu *vcpu, +- struct vcpu_vmx *vmx) ++ struct vcpu_vmx *vmx, ++ unsigned long flags) + { + guest_state_enter_irqoff(); + +@@ -6845,7 +6856,7 @@ static noinstr void vmx_vcpu_enter_exit(struct kvm_vcpu *vcpu, + native_write_cr2(vcpu->arch.cr2); + + vmx->fail = __vmx_vcpu_run(vmx, (unsigned long *)&vcpu->arch.regs, +- vmx->loaded_vmcs->launched); ++ flags); + + vcpu->arch.cr2 = native_read_cr2(); + +@@ -6953,7 +6964,7 @@ static fastpath_t vmx_vcpu_run(struct kvm_vcpu *vcpu) + x86_spec_ctrl_set_guest(vmx->spec_ctrl, 0); + + /* The actual VMENTER/EXIT is in the .noinstr.text section. */ +- vmx_vcpu_enter_exit(vcpu, vmx); ++ vmx_vcpu_enter_exit(vcpu, vmx, __vmx_vcpu_run_flags(vmx)); + + /* + * We do not use IBRS in the kernel. If this vCPU has used the +diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h +index 8d2342ede0c5..1b1982448aa4 100644 +--- a/arch/x86/kvm/vmx/vmx.h ++++ b/arch/x86/kvm/vmx/vmx.h +@@ -13,6 +13,7 @@ + #include "vmcs.h" + #include "vmx_ops.h" + #include "cpuid.h" ++#include "run_flags.h" + + #define MSR_TYPE_R 1 + #define MSR_TYPE_W 2 +@@ -404,7 +405,9 @@ void vmx_set_virtual_apic_mode(struct kvm_vcpu *vcpu); + struct vmx_uret_msr *vmx_find_uret_msr(struct vcpu_vmx *vmx, u32 msr); + void pt_update_intercept_for_msr(struct kvm_vcpu *vcpu); + void vmx_update_host_rsp(struct vcpu_vmx *vmx, unsigned long host_rsp); +-bool __vmx_vcpu_run(struct vcpu_vmx *vmx, unsigned long *regs, bool launched); ++unsigned int __vmx_vcpu_run_flags(struct vcpu_vmx *vmx); ++bool __vmx_vcpu_run(struct vcpu_vmx *vmx, unsigned long *regs, ++ unsigned int flags); + int vmx_find_loadstore_msr_slot(struct vmx_msrs *m, u32 msr); + void vmx_ept_load_pdptrs(struct kvm_vcpu *vcpu); + diff --git a/debian/patches/bugfix/x86/retbleed/0043-KVM-VMX-Prevent-guest-RSB-poisoning-attacks-with-eIB.patch b/debian/patches/bugfix/x86/retbleed/0043-KVM-VMX-Prevent-guest-RSB-poisoning-attacks-with-eIB.patch new file mode 100644 index 000000000..a394dda4d --- /dev/null +++ b/debian/patches/bugfix/x86/retbleed/0043-KVM-VMX-Prevent-guest-RSB-poisoning-attacks-with-eIB.patch @@ -0,0 +1,246 @@ +From: Josh Poimboeuf <jpoimboe@kernel.org> +Date: Tue, 14 Jun 2022 23:16:13 +0200 +Subject: KVM: VMX: Prevent guest RSB poisoning attacks with eIBRS +Origin: https://git.kernel.org/linus/fc02735b14fff8c6678b521d324ade27b1a3d4cf + +On eIBRS systems, the returns in the vmexit return path from +__vmx_vcpu_run() to vmx_vcpu_run() are exposed to RSB poisoning attacks. + +Fix that by moving the post-vmexit spec_ctrl handling to immediately +after the vmexit. + +Signed-off-by: Josh Poimboeuf <jpoimboe@kernel.org> +Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> +Signed-off-by: Borislav Petkov <bp@suse.de> +--- + arch/x86/include/asm/nospec-branch.h | 1 + + arch/x86/kernel/cpu/bugs.c | 4 +++ + arch/x86/kvm/vmx/run_flags.h | 1 + + arch/x86/kvm/vmx/vmenter.S | 49 +++++++++++++++++++++------- + arch/x86/kvm/vmx/vmx.c | 48 +++++++++++++++------------ + arch/x86/kvm/vmx/vmx.h | 1 + + 6 files changed, 73 insertions(+), 31 deletions(-) + +diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h +index dee9ef77af13..ccde87e6eabb 100644 +--- a/arch/x86/include/asm/nospec-branch.h ++++ b/arch/x86/include/asm/nospec-branch.h +@@ -274,6 +274,7 @@ static inline void indirect_branch_prediction_barrier(void) + + /* The Intel SPEC CTRL MSR base value cache */ + extern u64 x86_spec_ctrl_base; ++extern u64 x86_spec_ctrl_current; + extern void write_spec_ctrl_current(u64 val, bool force); + extern u64 spec_ctrl_current(void); + +diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c +index efff8b9f1bbd..31ccb7852afd 100644 +--- a/arch/x86/kernel/cpu/bugs.c ++++ b/arch/x86/kernel/cpu/bugs.c +@@ -195,6 +195,10 @@ void __init check_bugs(void) + #endif + } + ++/* ++ * NOTE: For VMX, this function is not called in the vmexit path. ++ * It uses vmx_spec_ctrl_restore_host() instead. ++ */ + void + x86_virt_spec_ctrl(u64 guest_spec_ctrl, u64 guest_virt_spec_ctrl, bool setguest) + { +diff --git a/arch/x86/kvm/vmx/run_flags.h b/arch/x86/kvm/vmx/run_flags.h +index 57f4c664ea9c..edc3f16cc189 100644 +--- a/arch/x86/kvm/vmx/run_flags.h ++++ b/arch/x86/kvm/vmx/run_flags.h +@@ -3,5 +3,6 @@ + #define __KVM_X86_VMX_RUN_FLAGS_H + + #define VMX_RUN_VMRESUME (1 << 0) ++#define VMX_RUN_SAVE_SPEC_CTRL (1 << 1) + + #endif /* __KVM_X86_VMX_RUN_FLAGS_H */ +diff --git a/arch/x86/kvm/vmx/vmenter.S b/arch/x86/kvm/vmx/vmenter.S +index ddc3bf85db33..8641ea74a307 100644 +--- a/arch/x86/kvm/vmx/vmenter.S ++++ b/arch/x86/kvm/vmx/vmenter.S +@@ -33,9 +33,10 @@ + + /** + * __vmx_vcpu_run - Run a vCPU via a transition to VMX guest mode +- * @vmx: struct vcpu_vmx * (forwarded to vmx_update_host_rsp) ++ * @vmx: struct vcpu_vmx * + * @regs: unsigned long * (to guest registers) +- * @flags: VMX_RUN_VMRESUME: use VMRESUME instead of VMLAUNCH ++ * @flags: VMX_RUN_VMRESUME: use VMRESUME instead of VMLAUNCH ++ * VMX_RUN_SAVE_SPEC_CTRL: save guest SPEC_CTRL into vmx->spec_ctrl + * + * Returns: + * 0 on VM-Exit, 1 on VM-Fail +@@ -54,6 +55,12 @@ SYM_FUNC_START(__vmx_vcpu_run) + #endif + push %_ASM_BX + ++ /* Save @vmx for SPEC_CTRL handling */ ++ push %_ASM_ARG1 ++ ++ /* Save @flags for SPEC_CTRL handling */ ++ push %_ASM_ARG3 ++ + /* + * Save @regs, _ASM_ARG2 may be modified by vmx_update_host_rsp() and + * @regs is needed after VM-Exit to save the guest's register values. +@@ -149,25 +156,23 @@ SYM_INNER_LABEL(vmx_vmexit, SYM_L_GLOBAL) + mov %r15, VCPU_R15(%_ASM_AX) + #endif + +- /* IMPORTANT: RSB must be stuffed before the first return. */ +- FILL_RETURN_BUFFER %_ASM_BX, RSB_CLEAR_LOOPS, X86_FEATURE_RETPOLINE +- +- /* Clear RAX to indicate VM-Exit (as opposed to VM-Fail). */ +- xor %eax, %eax ++ /* Clear return value to indicate VM-Exit (as opposed to VM-Fail). */ ++ xor %ebx, %ebx + + .Lclear_regs: + /* +- * Clear all general purpose registers except RSP and RAX to prevent ++ * Clear all general purpose registers except RSP and RBX to prevent + * speculative use of the guest's values, even those that are reloaded + * via the stack. In theory, an L1 cache miss when restoring registers + * could lead to speculative execution with the guest's values. + * Zeroing XORs are dirt cheap, i.e. the extra paranoia is essentially + * free. RSP and RAX are exempt as RSP is restored by hardware during +- * VM-Exit and RAX is explicitly loaded with 0 or 1 to return VM-Fail. ++ * VM-Exit and RBX is explicitly loaded with 0 or 1 to hold the return ++ * value. + */ ++ xor %eax, %eax + xor %ecx, %ecx + xor %edx, %edx +- xor %ebx, %ebx + xor %ebp, %ebp + xor %esi, %esi + xor %edi, %edi +@@ -185,6 +190,28 @@ SYM_INNER_LABEL(vmx_vmexit, SYM_L_GLOBAL) + /* "POP" @regs. */ + add $WORD_SIZE, %_ASM_SP + ++ /* ++ * IMPORTANT: RSB filling and SPEC_CTRL handling must be done before ++ * the first unbalanced RET after vmexit! ++ * ++ * For retpoline, RSB filling is needed to prevent poisoned RSB entries ++ * and (in some cases) RSB underflow. ++ * ++ * eIBRS has its own protection against poisoned RSB, so it doesn't ++ * need the RSB filling sequence. But it does need to be enabled ++ * before the first unbalanced RET. ++ */ ++ ++ FILL_RETURN_BUFFER %_ASM_CX, RSB_CLEAR_LOOPS, X86_FEATURE_RETPOLINE ++ ++ pop %_ASM_ARG2 /* @flags */ ++ pop %_ASM_ARG1 /* @vmx */ ++ ++ call vmx_spec_ctrl_restore_host ++ ++ /* Put return value in AX */ ++ mov %_ASM_BX, %_ASM_AX ++ + pop %_ASM_BX + #ifdef CONFIG_X86_64 + pop %r12 +@@ -204,7 +231,7 @@ SYM_INNER_LABEL(vmx_vmexit, SYM_L_GLOBAL) + ud2 + .Lvmfail: + /* VM-Fail: set return value to 1 */ +- mov $1, %eax ++ mov $1, %_ASM_BX + jmp .Lclear_regs + + SYM_FUNC_END(__vmx_vcpu_run) +diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c +index b255c2a5f680..b81000cc826a 100644 +--- a/arch/x86/kvm/vmx/vmx.c ++++ b/arch/x86/kvm/vmx/vmx.c +@@ -846,6 +846,14 @@ unsigned int __vmx_vcpu_run_flags(struct vcpu_vmx *vmx) + if (vmx->loaded_vmcs->launched) + flags |= VMX_RUN_VMRESUME; + ++ /* ++ * If writes to the SPEC_CTRL MSR aren't intercepted, the guest is free ++ * to change it directly without causing a vmexit. In that case read ++ * it after vmexit and store it in vmx->spec_ctrl. ++ */ ++ if (unlikely(!msr_write_intercepted(vmx, MSR_IA32_SPEC_CTRL))) ++ flags |= VMX_RUN_SAVE_SPEC_CTRL; ++ + return flags; + } + +@@ -6823,6 +6831,26 @@ void noinstr vmx_update_host_rsp(struct vcpu_vmx *vmx, unsigned long host_rsp) + } + } + ++void noinstr vmx_spec_ctrl_restore_host(struct vcpu_vmx *vmx, ++ unsigned int flags) ++{ ++ u64 hostval = this_cpu_read(x86_spec_ctrl_current); ++ ++ if (!cpu_feature_enabled(X86_FEATURE_MSR_SPEC_CTRL)) ++ return; ++ ++ if (flags & VMX_RUN_SAVE_SPEC_CTRL) ++ vmx->spec_ctrl = __rdmsr(MSR_IA32_SPEC_CTRL); ++ ++ /* ++ * If the guest/host SPEC_CTRL values differ, restore the host value. ++ */ ++ if (vmx->spec_ctrl != hostval) ++ native_wrmsrl(MSR_IA32_SPEC_CTRL, hostval); ++ ++ barrier_nospec(); ++} ++ + static fastpath_t vmx_exit_handlers_fastpath(struct kvm_vcpu *vcpu) + { + switch (to_vmx(vcpu)->exit_reason.basic) { +@@ -6966,26 +6994,6 @@ static fastpath_t vmx_vcpu_run(struct kvm_vcpu *vcpu) + /* The actual VMENTER/EXIT is in the .noinstr.text section. */ + vmx_vcpu_enter_exit(vcpu, vmx, __vmx_vcpu_run_flags(vmx)); + +- /* +- * We do not use IBRS in the kernel. If this vCPU has used the +- * SPEC_CTRL MSR it may have left it on; save the value and +- * turn it off. This is much more efficient than blindly adding +- * it to the atomic save/restore list. Especially as the former +- * (Saving guest MSRs on vmexit) doesn't even exist in KVM. +- * +- * For non-nested case: +- * If the L01 MSR bitmap does not intercept the MSR, then we need to +- * save it. +- * +- * For nested case: +- * If the L02 MSR bitmap does not intercept the MSR, then we need to +- * save it. +- */ +- if (unlikely(!msr_write_intercepted(vmx, MSR_IA32_SPEC_CTRL))) +- vmx->spec_ctrl = native_read_msr(MSR_IA32_SPEC_CTRL); +- +- x86_spec_ctrl_restore_host(vmx->spec_ctrl, 0); +- + /* All fields are clean at this point */ + if (static_branch_unlikely(&enable_evmcs)) { + current_evmcs->hv_clean_fields |= +diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h +index 1b1982448aa4..da654af12ccb 100644 +--- a/arch/x86/kvm/vmx/vmx.h ++++ b/arch/x86/kvm/vmx/vmx.h +@@ -405,6 +405,7 @@ void vmx_set_virtual_apic_mode(struct kvm_vcpu *vcpu); + struct vmx_uret_msr *vmx_find_uret_msr(struct vcpu_vmx *vmx, u32 msr); + void pt_update_intercept_for_msr(struct kvm_vcpu *vcpu); + void vmx_update_host_rsp(struct vcpu_vmx *vmx, unsigned long host_rsp); ++void vmx_spec_ctrl_restore_host(struct vcpu_vmx *vmx, unsigned int flags); + unsigned int __vmx_vcpu_run_flags(struct vcpu_vmx *vmx); + bool __vmx_vcpu_run(struct vcpu_vmx *vmx, unsigned long *regs, + unsigned int flags); diff --git a/debian/patches/bugfix/x86/retbleed/0044-KVM-VMX-Fix-IBRS-handling-after-vmexit.patch b/debian/patches/bugfix/x86/retbleed/0044-KVM-VMX-Fix-IBRS-handling-after-vmexit.patch new file mode 100644 index 000000000..0b45e2893 --- /dev/null +++ b/debian/patches/bugfix/x86/retbleed/0044-KVM-VMX-Fix-IBRS-handling-after-vmexit.patch @@ -0,0 +1,34 @@ +From: Josh Poimboeuf <jpoimboe@kernel.org> +Date: Tue, 14 Jun 2022 23:16:14 +0200 +Subject: KVM: VMX: Fix IBRS handling after vmexit +Origin: https://git.kernel.org/linus/bea7e31a5caccb6fe8ed989c065072354f0ecb52 + +For legacy IBRS to work, the IBRS bit needs to be always re-written +after vmexit, even if it's already on. + +Signed-off-by: Josh Poimboeuf <jpoimboe@kernel.org> +Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> +Signed-off-by: Borislav Petkov <bp@suse.de> +--- + arch/x86/kvm/vmx/vmx.c | 7 ++++++- + 1 file changed, 6 insertions(+), 1 deletion(-) + +diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c +index b81000cc826a..421db354b1ab 100644 +--- a/arch/x86/kvm/vmx/vmx.c ++++ b/arch/x86/kvm/vmx/vmx.c +@@ -6844,8 +6844,13 @@ void noinstr vmx_spec_ctrl_restore_host(struct vcpu_vmx *vmx, + + /* + * If the guest/host SPEC_CTRL values differ, restore the host value. ++ * ++ * For legacy IBRS, the IBRS bit always needs to be written after ++ * transitioning from a less privileged predictor mode, regardless of ++ * whether the guest/host values differ. + */ +- if (vmx->spec_ctrl != hostval) ++ if (cpu_feature_enabled(X86_FEATURE_KERNEL_IBRS) || ++ vmx->spec_ctrl != hostval) + native_wrmsrl(MSR_IA32_SPEC_CTRL, hostval); + + barrier_nospec(); diff --git a/debian/patches/bugfix/x86/retbleed/0045-x86-speculation-Fill-RSB-on-vmexit-for-IBRS.patch b/debian/patches/bugfix/x86/retbleed/0045-x86-speculation-Fill-RSB-on-vmexit-for-IBRS.patch new file mode 100644 index 000000000..a5395236a --- /dev/null +++ b/debian/patches/bugfix/x86/retbleed/0045-x86-speculation-Fill-RSB-on-vmexit-for-IBRS.patch @@ -0,0 +1,133 @@ +From: Josh Poimboeuf <jpoimboe@kernel.org> +Date: Tue, 14 Jun 2022 23:16:15 +0200 +Subject: x86/speculation: Fill RSB on vmexit for IBRS +Origin: https://git.kernel.org/linus/9756bba28470722dacb79ffce554336dd1f6a6cd + +Prevent RSB underflow/poisoning attacks with RSB. While at it, add a +bunch of comments to attempt to document the current state of tribal +knowledge about RSB attacks and what exactly is being mitigated. + +Signed-off-by: Josh Poimboeuf <jpoimboe@kernel.org> +Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> +Signed-off-by: Borislav Petkov <bp@suse.de> +--- + arch/x86/include/asm/cpufeatures.h | 2 +- + arch/x86/kernel/cpu/bugs.c | 63 +++++++++++++++++++++++++++--- + arch/x86/kvm/vmx/vmenter.S | 6 +-- + 3 files changed, 62 insertions(+), 9 deletions(-) + +diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h +index 09dce77f4848..d143f018eda1 100644 +--- a/arch/x86/include/asm/cpufeatures.h ++++ b/arch/x86/include/asm/cpufeatures.h +@@ -204,7 +204,7 @@ + #define X86_FEATURE_XCOMPACTED ( 7*32+10) /* "" Use compacted XSTATE (XSAVES or XSAVEC) */ + #define X86_FEATURE_PTI ( 7*32+11) /* Kernel Page Table Isolation enabled */ + #define X86_FEATURE_KERNEL_IBRS ( 7*32+12) /* "" Set/clear IBRS on kernel entry/exit */ +-/* FREE! ( 7*32+13) */ ++#define X86_FEATURE_RSB_VMEXIT ( 7*32+13) /* "" Fill RSB on VM-Exit */ + #define X86_FEATURE_INTEL_PPIN ( 7*32+14) /* Intel Processor Inventory Number */ + #define X86_FEATURE_CDP_L2 ( 7*32+15) /* Code and Data Prioritization L2 */ + #define X86_FEATURE_MSR_SPEC_CTRL ( 7*32+16) /* "" MSR SPEC_CTRL is implemented */ +diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c +index 31ccb7852afd..fcbd072a5e36 100644 +--- a/arch/x86/kernel/cpu/bugs.c ++++ b/arch/x86/kernel/cpu/bugs.c +@@ -1401,16 +1401,69 @@ static void __init spectre_v2_select_mitigation(void) + pr_info("%s\n", spectre_v2_strings[mode]); + + /* +- * If spectre v2 protection has been enabled, unconditionally fill +- * RSB during a context switch; this protects against two independent +- * issues: ++ * If Spectre v2 protection has been enabled, fill the RSB during a ++ * context switch. In general there are two types of RSB attacks ++ * across context switches, for which the CALLs/RETs may be unbalanced. + * +- * - RSB underflow (and switch to BTB) on Skylake+ +- * - SpectreRSB variant of spectre v2 on X86_BUG_SPECTRE_V2 CPUs ++ * 1) RSB underflow ++ * ++ * Some Intel parts have "bottomless RSB". When the RSB is empty, ++ * speculated return targets may come from the branch predictor, ++ * which could have a user-poisoned BTB or BHB entry. ++ * ++ * AMD has it even worse: *all* returns are speculated from the BTB, ++ * regardless of the state of the RSB. ++ * ++ * When IBRS or eIBRS is enabled, the "user -> kernel" attack ++ * scenario is mitigated by the IBRS branch prediction isolation ++ * properties, so the RSB buffer filling wouldn't be necessary to ++ * protect against this type of attack. ++ * ++ * The "user -> user" attack scenario is mitigated by RSB filling. ++ * ++ * 2) Poisoned RSB entry ++ * ++ * If the 'next' in-kernel return stack is shorter than 'prev', ++ * 'next' could be tricked into speculating with a user-poisoned RSB ++ * entry. ++ * ++ * The "user -> kernel" attack scenario is mitigated by SMEP and ++ * eIBRS. ++ * ++ * The "user -> user" scenario, also known as SpectreBHB, requires ++ * RSB clearing. ++ * ++ * So to mitigate all cases, unconditionally fill RSB on context ++ * switches. ++ * ++ * FIXME: Is this pointless for retbleed-affected AMD? + */ + setup_force_cpu_cap(X86_FEATURE_RSB_CTXSW); + pr_info("Spectre v2 / SpectreRSB mitigation: Filling RSB on context switch\n"); + ++ /* ++ * Similar to context switches, there are two types of RSB attacks ++ * after vmexit: ++ * ++ * 1) RSB underflow ++ * ++ * 2) Poisoned RSB entry ++ * ++ * When retpoline is enabled, both are mitigated by filling/clearing ++ * the RSB. ++ * ++ * When IBRS is enabled, while #1 would be mitigated by the IBRS branch ++ * prediction isolation protections, RSB still needs to be cleared ++ * because of #2. Note that SMEP provides no protection here, unlike ++ * user-space-poisoned RSB entries. ++ * ++ * eIBRS, on the other hand, has RSB-poisoning protections, so it ++ * doesn't need RSB clearing after vmexit. ++ */ ++ if (boot_cpu_has(X86_FEATURE_RETPOLINE) || ++ boot_cpu_has(X86_FEATURE_KERNEL_IBRS)) ++ setup_force_cpu_cap(X86_FEATURE_RSB_VMEXIT); ++ + /* + * Retpoline protects the kernel, but doesn't protect firmware. IBRS + * and Enhanced IBRS protect firmware too, so enable IBRS around +diff --git a/arch/x86/kvm/vmx/vmenter.S b/arch/x86/kvm/vmx/vmenter.S +index 8641ea74a307..4c743fa98a1f 100644 +--- a/arch/x86/kvm/vmx/vmenter.S ++++ b/arch/x86/kvm/vmx/vmenter.S +@@ -194,15 +194,15 @@ SYM_INNER_LABEL(vmx_vmexit, SYM_L_GLOBAL) + * IMPORTANT: RSB filling and SPEC_CTRL handling must be done before + * the first unbalanced RET after vmexit! + * +- * For retpoline, RSB filling is needed to prevent poisoned RSB entries +- * and (in some cases) RSB underflow. ++ * For retpoline or IBRS, RSB filling is needed to prevent poisoned RSB ++ * entries and (in some cases) RSB underflow. + * + * eIBRS has its own protection against poisoned RSB, so it doesn't + * need the RSB filling sequence. But it does need to be enabled + * before the first unbalanced RET. + */ + +- FILL_RETURN_BUFFER %_ASM_CX, RSB_CLEAR_LOOPS, X86_FEATURE_RETPOLINE ++ FILL_RETURN_BUFFER %_ASM_CX, RSB_CLEAR_LOOPS, X86_FEATURE_RSB_VMEXIT + + pop %_ASM_ARG2 /* @flags */ + pop %_ASM_ARG1 /* @vmx */ diff --git a/debian/patches/bugfix/x86/retbleed/0046-KVM-VMX-Prevent-RSB-underflow-before-vmenter.patch b/debian/patches/bugfix/x86/retbleed/0046-KVM-VMX-Prevent-RSB-underflow-before-vmenter.patch new file mode 100644 index 000000000..860b76f25 --- /dev/null +++ b/debian/patches/bugfix/x86/retbleed/0046-KVM-VMX-Prevent-RSB-underflow-before-vmenter.patch @@ -0,0 +1,182 @@ +From: Josh Poimboeuf <jpoimboe@kernel.org> +Date: Tue, 14 Jun 2022 23:16:16 +0200 +Subject: KVM: VMX: Prevent RSB underflow before vmenter +Origin: https://git.kernel.org/linus/07853adc29a058c5fd143c14e5ac528448a72ed9 + +On VMX, there are some balanced returns between the time the guest's +SPEC_CTRL value is written, and the vmenter. + +Balanced returns (matched by a preceding call) are usually ok, but it's +at least theoretically possible an NMI with a deep call stack could +empty the RSB before one of the returns. + +For maximum paranoia, don't allow *any* returns (balanced or otherwise) +between the SPEC_CTRL write and the vmenter. + + [ bp: Fix 32-bit build. ] + +Signed-off-by: Josh Poimboeuf <jpoimboe@kernel.org> +Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> +Signed-off-by: Borislav Petkov <bp@suse.de> +--- + arch/x86/kernel/asm-offsets.c | 6 ++++++ + arch/x86/kernel/cpu/bugs.c | 4 ++-- + arch/x86/kvm/vmx/capabilities.h | 4 ++-- + arch/x86/kvm/vmx/vmenter.S | 29 +++++++++++++++++++++++++++++ + arch/x86/kvm/vmx/vmx.c | 8 -------- + arch/x86/kvm/vmx/vmx.h | 4 ++-- + arch/x86/kvm/vmx/vmx_ops.h | 2 +- + 7 files changed, 42 insertions(+), 15 deletions(-) + +diff --git a/arch/x86/kernel/asm-offsets.c b/arch/x86/kernel/asm-offsets.c +index 437308004ef2..cb50589a7102 100644 +--- a/arch/x86/kernel/asm-offsets.c ++++ b/arch/x86/kernel/asm-offsets.c +@@ -19,6 +19,7 @@ + #include <asm/suspend.h> + #include <asm/tlbflush.h> + #include <asm/tdx.h> ++#include "../kvm/vmx/vmx.h" + + #ifdef CONFIG_XEN + #include <xen/interface/xen.h> +@@ -107,4 +108,9 @@ static void __used common(void) + OFFSET(TSS_sp0, tss_struct, x86_tss.sp0); + OFFSET(TSS_sp1, tss_struct, x86_tss.sp1); + OFFSET(TSS_sp2, tss_struct, x86_tss.sp2); ++ ++ if (IS_ENABLED(CONFIG_KVM_INTEL)) { ++ BLANK(); ++ OFFSET(VMX_spec_ctrl, vcpu_vmx, spec_ctrl); ++ } + } +diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c +index fcbd072a5e36..182f8b2e8a3c 100644 +--- a/arch/x86/kernel/cpu/bugs.c ++++ b/arch/x86/kernel/cpu/bugs.c +@@ -196,8 +196,8 @@ void __init check_bugs(void) + } + + /* +- * NOTE: For VMX, this function is not called in the vmexit path. +- * It uses vmx_spec_ctrl_restore_host() instead. ++ * NOTE: This function is *only* called for SVM. VMX spec_ctrl handling is ++ * done in vmenter.S. + */ + void + x86_virt_spec_ctrl(u64 guest_spec_ctrl, u64 guest_virt_spec_ctrl, bool setguest) +diff --git a/arch/x86/kvm/vmx/capabilities.h b/arch/x86/kvm/vmx/capabilities.h +index 3f430e218375..c0e24826a86f 100644 +--- a/arch/x86/kvm/vmx/capabilities.h ++++ b/arch/x86/kvm/vmx/capabilities.h +@@ -4,8 +4,8 @@ + + #include <asm/vmx.h> + +-#include "lapic.h" +-#include "x86.h" ++#include "../lapic.h" ++#include "../x86.h" + + extern bool __read_mostly enable_vpid; + extern bool __read_mostly flexpriority_enabled; +diff --git a/arch/x86/kvm/vmx/vmenter.S b/arch/x86/kvm/vmx/vmenter.S +index 4c743fa98a1f..4182c7ffc909 100644 +--- a/arch/x86/kvm/vmx/vmenter.S ++++ b/arch/x86/kvm/vmx/vmenter.S +@@ -1,9 +1,11 @@ + /* SPDX-License-Identifier: GPL-2.0 */ + #include <linux/linkage.h> + #include <asm/asm.h> ++#include <asm/asm-offsets.h> + #include <asm/bitsperlong.h> + #include <asm/kvm_vcpu_regs.h> + #include <asm/nospec-branch.h> ++#include <asm/percpu.h> + #include <asm/segment.h> + #include "run_flags.h" + +@@ -73,6 +75,33 @@ SYM_FUNC_START(__vmx_vcpu_run) + lea (%_ASM_SP), %_ASM_ARG2 + call vmx_update_host_rsp + ++ ALTERNATIVE "jmp .Lspec_ctrl_done", "", X86_FEATURE_MSR_SPEC_CTRL ++ ++ /* ++ * SPEC_CTRL handling: if the guest's SPEC_CTRL value differs from the ++ * host's, write the MSR. ++ * ++ * IMPORTANT: To avoid RSB underflow attacks and any other nastiness, ++ * there must not be any returns or indirect branches between this code ++ * and vmentry. ++ */ ++ mov 2*WORD_SIZE(%_ASM_SP), %_ASM_DI ++ movl VMX_spec_ctrl(%_ASM_DI), %edi ++ movl PER_CPU_VAR(x86_spec_ctrl_current), %esi ++ cmp %edi, %esi ++ je .Lspec_ctrl_done ++ mov $MSR_IA32_SPEC_CTRL, %ecx ++ xor %edx, %edx ++ mov %edi, %eax ++ wrmsr ++ ++.Lspec_ctrl_done: ++ ++ /* ++ * Since vmentry is serializing on affected CPUs, there's no need for ++ * an LFENCE to stop speculation from skipping the wrmsr. ++ */ ++ + /* Load @regs to RAX. */ + mov (%_ASM_SP), %_ASM_AX + +diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c +index 421db354b1ab..be7c19374fdd 100644 +--- a/arch/x86/kvm/vmx/vmx.c ++++ b/arch/x86/kvm/vmx/vmx.c +@@ -6988,14 +6988,6 @@ static fastpath_t vmx_vcpu_run(struct kvm_vcpu *vcpu) + + kvm_wait_lapic_expire(vcpu); + +- /* +- * If this vCPU has touched SPEC_CTRL, restore the guest's value if +- * it's non-zero. Since vmentry is serialising on affected CPUs, there +- * is no need to worry about the conditional branch over the wrmsr +- * being speculatively taken. +- */ +- x86_spec_ctrl_set_guest(vmx->spec_ctrl, 0); +- + /* The actual VMENTER/EXIT is in the .noinstr.text section. */ + vmx_vcpu_enter_exit(vcpu, vmx, __vmx_vcpu_run_flags(vmx)); + +diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h +index da654af12ccb..1e7f9453894b 100644 +--- a/arch/x86/kvm/vmx/vmx.h ++++ b/arch/x86/kvm/vmx/vmx.h +@@ -8,11 +8,11 @@ + #include <asm/intel_pt.h> + + #include "capabilities.h" +-#include "kvm_cache_regs.h" ++#include "../kvm_cache_regs.h" + #include "posted_intr.h" + #include "vmcs.h" + #include "vmx_ops.h" +-#include "cpuid.h" ++#include "../cpuid.h" + #include "run_flags.h" + + #define MSR_TYPE_R 1 +diff --git a/arch/x86/kvm/vmx/vmx_ops.h b/arch/x86/kvm/vmx/vmx_ops.h +index 5e7f41225780..5cfc49ddb1b4 100644 +--- a/arch/x86/kvm/vmx/vmx_ops.h ++++ b/arch/x86/kvm/vmx/vmx_ops.h +@@ -8,7 +8,7 @@ + + #include "evmcs.h" + #include "vmcs.h" +-#include "x86.h" ++#include "../x86.h" + + asmlinkage void vmread_error(unsigned long field, bool fault); + __attribute__((regparm(0))) void vmread_error_trampoline(unsigned long field, diff --git a/debian/patches/bugfix/x86/retbleed/0047-x86-common-Stamp-out-the-stepping-madness.patch b/debian/patches/bugfix/x86/retbleed/0047-x86-common-Stamp-out-the-stepping-madness.patch new file mode 100644 index 000000000..b31d1eac4 --- /dev/null +++ b/debian/patches/bugfix/x86/retbleed/0047-x86-common-Stamp-out-the-stepping-madness.patch @@ -0,0 +1,73 @@ +From: Peter Zijlstra <peterz@infradead.org> +Date: Fri, 24 Jun 2022 14:03:25 +0200 +Subject: x86/common: Stamp out the stepping madness +Origin: https://git.kernel.org/linus/7a05bc95ed1c5a59e47aaade9fb4083c27de9e62 + +The whole MMIO/RETBLEED enumeration went overboard on steppings. Get +rid of all that and simply use ANY. + +If a future stepping of these models would not be affected, it had +better set the relevant ARCH_CAP_$FOO_NO bit in +IA32_ARCH_CAPABILITIES. + +Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> +Signed-off-by: Borislav Petkov <bp@suse.de> +Acked-by: Dave Hansen <dave.hansen@linux.intel.com> +Signed-off-by: Borislav Petkov <bp@suse.de> +--- + arch/x86/kernel/cpu/common.c | 37 ++++++++++++++++-------------------- + 1 file changed, 16 insertions(+), 21 deletions(-) + +diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c +index 75a5c720c852..42d3c7e5f4d7 100644 +--- a/arch/x86/kernel/cpu/common.c ++++ b/arch/x86/kernel/cpu/common.c +@@ -1232,32 +1232,27 @@ static const struct x86_cpu_id cpu_vuln_blacklist[] __initconst = { + VULNBL_INTEL_STEPPINGS(HASWELL, X86_STEPPING_ANY, SRBDS), + VULNBL_INTEL_STEPPINGS(HASWELL_L, X86_STEPPING_ANY, SRBDS), + VULNBL_INTEL_STEPPINGS(HASWELL_G, X86_STEPPING_ANY, SRBDS), +- VULNBL_INTEL_STEPPINGS(HASWELL_X, BIT(2) | BIT(4), MMIO), +- VULNBL_INTEL_STEPPINGS(BROADWELL_D, X86_STEPPINGS(0x3, 0x5), MMIO), ++ VULNBL_INTEL_STEPPINGS(HASWELL_X, X86_STEPPING_ANY, MMIO), ++ VULNBL_INTEL_STEPPINGS(BROADWELL_D, X86_STEPPING_ANY, MMIO), + VULNBL_INTEL_STEPPINGS(BROADWELL_G, X86_STEPPING_ANY, SRBDS), + VULNBL_INTEL_STEPPINGS(BROADWELL_X, X86_STEPPING_ANY, MMIO), + VULNBL_INTEL_STEPPINGS(BROADWELL, X86_STEPPING_ANY, SRBDS), +- VULNBL_INTEL_STEPPINGS(SKYLAKE_L, X86_STEPPINGS(0x3, 0x3), SRBDS | MMIO | RETBLEED), +- VULNBL_INTEL_STEPPINGS(SKYLAKE_L, X86_STEPPING_ANY, SRBDS), +- VULNBL_INTEL_STEPPINGS(SKYLAKE_X, BIT(3) | BIT(4) | BIT(6) | +- BIT(7) | BIT(0xB), MMIO | RETBLEED), +- VULNBL_INTEL_STEPPINGS(SKYLAKE, X86_STEPPINGS(0x3, 0x3), SRBDS | MMIO | RETBLEED), +- VULNBL_INTEL_STEPPINGS(SKYLAKE, X86_STEPPING_ANY, SRBDS), +- VULNBL_INTEL_STEPPINGS(KABYLAKE_L, X86_STEPPINGS(0x9, 0xC), SRBDS | MMIO | RETBLEED), +- VULNBL_INTEL_STEPPINGS(KABYLAKE_L, X86_STEPPINGS(0x0, 0x8), SRBDS), +- VULNBL_INTEL_STEPPINGS(KABYLAKE, X86_STEPPINGS(0x9, 0xD), SRBDS | MMIO | RETBLEED), +- VULNBL_INTEL_STEPPINGS(KABYLAKE, X86_STEPPINGS(0x0, 0x8), SRBDS), +- VULNBL_INTEL_STEPPINGS(ICELAKE_L, X86_STEPPINGS(0x5, 0x5), MMIO | MMIO_SBDS | RETBLEED), +- VULNBL_INTEL_STEPPINGS(ICELAKE_D, X86_STEPPINGS(0x1, 0x1), MMIO), +- VULNBL_INTEL_STEPPINGS(ICELAKE_X, X86_STEPPINGS(0x4, 0x6), MMIO), +- VULNBL_INTEL_STEPPINGS(COMETLAKE, BIT(2) | BIT(3) | BIT(5), MMIO | MMIO_SBDS | RETBLEED), +- VULNBL_INTEL_STEPPINGS(COMETLAKE_L, X86_STEPPINGS(0x1, 0x1), MMIO | MMIO_SBDS | RETBLEED), ++ VULNBL_INTEL_STEPPINGS(SKYLAKE_L, X86_STEPPING_ANY, SRBDS | MMIO | RETBLEED), ++ VULNBL_INTEL_STEPPINGS(SKYLAKE_X, X86_STEPPING_ANY, MMIO | RETBLEED), ++ VULNBL_INTEL_STEPPINGS(SKYLAKE, X86_STEPPING_ANY, SRBDS | MMIO | RETBLEED), ++ VULNBL_INTEL_STEPPINGS(KABYLAKE_L, X86_STEPPING_ANY, SRBDS | MMIO | RETBLEED), ++ VULNBL_INTEL_STEPPINGS(KABYLAKE, X86_STEPPING_ANY, SRBDS | MMIO | RETBLEED), ++ VULNBL_INTEL_STEPPINGS(ICELAKE_L, X86_STEPPING_ANY, MMIO | MMIO_SBDS | RETBLEED), ++ VULNBL_INTEL_STEPPINGS(ICELAKE_D, X86_STEPPING_ANY, MMIO), ++ VULNBL_INTEL_STEPPINGS(ICELAKE_X, X86_STEPPING_ANY, MMIO), ++ VULNBL_INTEL_STEPPINGS(COMETLAKE, X86_STEPPING_ANY, MMIO | MMIO_SBDS | RETBLEED), + VULNBL_INTEL_STEPPINGS(COMETLAKE_L, X86_STEPPINGS(0x0, 0x0), MMIO | RETBLEED), +- VULNBL_INTEL_STEPPINGS(LAKEFIELD, X86_STEPPINGS(0x1, 0x1), MMIO | MMIO_SBDS | RETBLEED), +- VULNBL_INTEL_STEPPINGS(ROCKETLAKE, X86_STEPPINGS(0x1, 0x1), MMIO | RETBLEED), +- VULNBL_INTEL_STEPPINGS(ATOM_TREMONT, X86_STEPPINGS(0x1, 0x1), MMIO | MMIO_SBDS), ++ VULNBL_INTEL_STEPPINGS(COMETLAKE_L, X86_STEPPING_ANY, MMIO | MMIO_SBDS | RETBLEED), ++ VULNBL_INTEL_STEPPINGS(LAKEFIELD, X86_STEPPING_ANY, MMIO | MMIO_SBDS | RETBLEED), ++ VULNBL_INTEL_STEPPINGS(ROCKETLAKE, X86_STEPPING_ANY, MMIO | RETBLEED), ++ VULNBL_INTEL_STEPPINGS(ATOM_TREMONT, X86_STEPPING_ANY, MMIO | MMIO_SBDS), + VULNBL_INTEL_STEPPINGS(ATOM_TREMONT_D, X86_STEPPING_ANY, MMIO), +- VULNBL_INTEL_STEPPINGS(ATOM_TREMONT_L, X86_STEPPINGS(0x0, 0x0), MMIO | MMIO_SBDS), ++ VULNBL_INTEL_STEPPINGS(ATOM_TREMONT_L, X86_STEPPING_ANY, MMIO | MMIO_SBDS), + + VULNBL_AMD(0x15, RETBLEED), + VULNBL_AMD(0x16, RETBLEED), diff --git a/debian/patches/bugfix/x86/retbleed/0048-x86-cpu-amd-Enumerate-BTC_NO.patch b/debian/patches/bugfix/x86/retbleed/0048-x86-cpu-amd-Enumerate-BTC_NO.patch new file mode 100644 index 000000000..7cbd24b23 --- /dev/null +++ b/debian/patches/bugfix/x86/retbleed/0048-x86-cpu-amd-Enumerate-BTC_NO.patch @@ -0,0 +1,83 @@ +From: Andrew Cooper <andrew.cooper3@citrix.com> +Date: Fri, 24 Jun 2022 14:41:21 +0100 +Subject: x86/cpu/amd: Enumerate BTC_NO +Origin: https://git.kernel.org/linus/26aae8ccbc1972233afd08fb3f368947c0314265 + +BTC_NO indicates that hardware is not susceptible to Branch Type Confusion. + +Zen3 CPUs don't suffer BTC. + +Hypervisors are expected to synthesise BTC_NO when it is appropriate +given the migration pool, to prevent kernels using heuristics. + + [ bp: Massage. ] + +Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com> +Signed-off-by: Borislav Petkov <bp@suse.de> +--- + arch/x86/include/asm/cpufeatures.h | 1 + + arch/x86/kernel/cpu/amd.c | 21 +++++++++++++++------ + arch/x86/kernel/cpu/common.c | 6 ++++-- + 3 files changed, 20 insertions(+), 8 deletions(-) + +diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h +index d143f018eda1..7e8099fd5ec1 100644 +--- a/arch/x86/include/asm/cpufeatures.h ++++ b/arch/x86/include/asm/cpufeatures.h +@@ -322,6 +322,7 @@ + #define X86_FEATURE_VIRT_SSBD (13*32+25) /* Virtualized Speculative Store Bypass Disable */ + #define X86_FEATURE_AMD_SSB_NO (13*32+26) /* "" Speculative Store Bypass is fixed in hardware. */ + #define X86_FEATURE_CPPC (13*32+27) /* Collaborative Processor Performance Control */ ++#define X86_FEATURE_BTC_NO (13*32+29) /* "" Not vulnerable to Branch Type Confusion */ + #define X86_FEATURE_BRS (13*32+31) /* Branch Sampling available */ + + /* Thermal and Power Management Leaf, CPUID level 0x00000006 (EAX), word 14 */ +diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c +index 8cf0659c0521..9cfd11f7ba11 100644 +--- a/arch/x86/kernel/cpu/amd.c ++++ b/arch/x86/kernel/cpu/amd.c +@@ -890,12 +890,21 @@ static void init_amd_zn(struct cpuinfo_x86 *c) + node_reclaim_distance = 32; + #endif + +- /* +- * Fix erratum 1076: CPB feature bit not being set in CPUID. +- * Always set it, except when running under a hypervisor. +- */ +- if (!cpu_has(c, X86_FEATURE_HYPERVISOR) && !cpu_has(c, X86_FEATURE_CPB)) +- set_cpu_cap(c, X86_FEATURE_CPB); ++ /* Fix up CPUID bits, but only if not virtualised. */ ++ if (!cpu_has(c, X86_FEATURE_HYPERVISOR)) { ++ ++ /* Erratum 1076: CPB feature bit not being set in CPUID. */ ++ if (!cpu_has(c, X86_FEATURE_CPB)) ++ set_cpu_cap(c, X86_FEATURE_CPB); ++ ++ /* ++ * Zen3 (Fam19 model < 0x10) parts are not susceptible to ++ * Branch Type Confusion, but predate the allocation of the ++ * BTC_NO bit. ++ */ ++ if (c->x86 == 0x19 && !cpu_has(c, X86_FEATURE_BTC_NO)) ++ set_cpu_cap(c, X86_FEATURE_BTC_NO); ++ } + } + + static void init_amd(struct cpuinfo_x86 *c) +diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c +index 42d3c7e5f4d7..02ba27355b04 100644 +--- a/arch/x86/kernel/cpu/common.c ++++ b/arch/x86/kernel/cpu/common.c +@@ -1359,8 +1359,10 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c) + !arch_cap_mmio_immune(ia32_cap)) + setup_force_cpu_bug(X86_BUG_MMIO_STALE_DATA); + +- if ((cpu_matches(cpu_vuln_blacklist, RETBLEED) || (ia32_cap & ARCH_CAP_RSBA))) +- setup_force_cpu_bug(X86_BUG_RETBLEED); ++ if (!cpu_has(c, X86_FEATURE_BTC_NO)) { ++ if (cpu_matches(cpu_vuln_blacklist, RETBLEED) || (ia32_cap & ARCH_CAP_RSBA)) ++ setup_force_cpu_bug(X86_BUG_RETBLEED); ++ } + + if (cpu_matches(cpu_vuln_whitelist, NO_MELTDOWN)) + return; diff --git a/debian/patches/bugfix/x86/retbleed/0049-x86-retbleed-Add-fine-grained-Kconfig-knobs.patch b/debian/patches/bugfix/x86/retbleed/0049-x86-retbleed-Add-fine-grained-Kconfig-knobs.patch new file mode 100644 index 000000000..c3fd951d3 --- /dev/null +++ b/debian/patches/bugfix/x86/retbleed/0049-x86-retbleed-Add-fine-grained-Kconfig-knobs.patch @@ -0,0 +1,632 @@ +From: Peter Zijlstra <peterz@infradead.org> +Date: Mon, 27 Jun 2022 22:21:17 +0000 +Subject: x86/retbleed: Add fine grained Kconfig knobs +Origin: https://git.kernel.org/linus/f43b9876e857c739d407bc56df288b0ebe1a9164 + +Do fine-grained Kconfig for all the various retbleed parts. + +NOTE: if your compiler doesn't support return thunks this will +silently 'upgrade' your mitigation to IBPB, you might not like this. + +Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> +Signed-off-by: Borislav Petkov <bp@suse.de> +--- + arch/x86/Kconfig | 111 +++++++++++++++++------ + arch/x86/Makefile | 8 +- + arch/x86/entry/calling.h | 4 + + arch/x86/include/asm/disabled-features.h | 18 +++- + arch/x86/include/asm/linkage.h | 4 +- + arch/x86/include/asm/nospec-branch.h | 10 +- + arch/x86/include/asm/static_call.h | 2 +- + arch/x86/kernel/alternative.c | 5 + + arch/x86/kernel/cpu/amd.c | 2 + + arch/x86/kernel/cpu/bugs.c | 42 ++++++--- + arch/x86/kernel/static_call.c | 2 +- + arch/x86/kvm/emulate.c | 4 +- + arch/x86/lib/retpoline.S | 4 + + scripts/Makefile.lib | 1 + + scripts/Makefile.vmlinux_o | 2 +- + security/Kconfig | 11 --- + tools/objtool/builtin-check.c | 7 ++ + tools/objtool/check.c | 9 +- + tools/objtool/include/objtool/builtin.h | 1 + + 19 files changed, 178 insertions(+), 69 deletions(-) + +diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig +index e35eecfb74f2..e58798f636d4 100644 +--- a/arch/x86/Kconfig ++++ b/arch/x86/Kconfig +@@ -462,32 +462,6 @@ config GOLDFISH + def_bool y + depends on X86_GOLDFISH + +-config RETPOLINE +- bool "Avoid speculative indirect branches in kernel" +- select OBJTOOL if HAVE_OBJTOOL +- default y +- help +- Compile kernel with the retpoline compiler options to guard against +- kernel-to-user data leaks by avoiding speculative indirect +- branches. Requires a compiler with -mindirect-branch=thunk-extern +- support for full protection. The kernel may run slower. +- +-config CC_HAS_SLS +- def_bool $(cc-option,-mharden-sls=all) +- +-config CC_HAS_RETURN_THUNK +- def_bool $(cc-option,-mfunction-return=thunk-extern) +- +-config SLS +- bool "Mitigate Straight-Line-Speculation" +- depends on CC_HAS_SLS && X86_64 +- select OBJTOOL if HAVE_OBJTOOL +- default n +- help +- Compile the kernel with straight-line-speculation options to guard +- against straight line speculation. The kernel image might be slightly +- larger. +- + config X86_CPU_RESCTRL + bool "x86 CPU resource control support" + depends on X86 && (CPU_SUP_INTEL || CPU_SUP_AMD) +@@ -2456,6 +2430,91 @@ source "kernel/livepatch/Kconfig" + + endmenu + ++config CC_HAS_SLS ++ def_bool $(cc-option,-mharden-sls=all) ++ ++config CC_HAS_RETURN_THUNK ++ def_bool $(cc-option,-mfunction-return=thunk-extern) ++ ++menuconfig SPECULATION_MITIGATIONS ++ bool "Mitigations for speculative execution vulnerabilities" ++ default y ++ help ++ Say Y here to enable options which enable mitigations for ++ speculative execution hardware vulnerabilities. ++ ++ If you say N, all mitigations will be disabled. You really ++ should know what you are doing to say so. ++ ++if SPECULATION_MITIGATIONS ++ ++config PAGE_TABLE_ISOLATION ++ bool "Remove the kernel mapping in user mode" ++ default y ++ depends on (X86_64 || X86_PAE) ++ help ++ This feature reduces the number of hardware side channels by ++ ensuring that the majority of kernel addresses are not mapped ++ into userspace. ++ ++ See Documentation/x86/pti.rst for more details. ++ ++config RETPOLINE ++ bool "Avoid speculative indirect branches in kernel" ++ select OBJTOOL if HAVE_OBJTOOL ++ default y ++ help ++ Compile kernel with the retpoline compiler options to guard against ++ kernel-to-user data leaks by avoiding speculative indirect ++ branches. Requires a compiler with -mindirect-branch=thunk-extern ++ support for full protection. The kernel may run slower. ++ ++config RETHUNK ++ bool "Enable return-thunks" ++ depends on RETPOLINE && CC_HAS_RETURN_THUNK ++ select OBJTOOL if HAVE_OBJTOOL ++ default y ++ help ++ Compile the kernel with the return-thunks compiler option to guard ++ against kernel-to-user data leaks by avoiding return speculation. ++ Requires a compiler with -mfunction-return=thunk-extern ++ support for full protection. The kernel may run slower. ++ ++config CPU_UNRET_ENTRY ++ bool "Enable UNRET on kernel entry" ++ depends on CPU_SUP_AMD && RETHUNK ++ default y ++ help ++ Compile the kernel with support for the retbleed=unret mitigation. ++ ++config CPU_IBPB_ENTRY ++ bool "Enable IBPB on kernel entry" ++ depends on CPU_SUP_AMD ++ default y ++ help ++ Compile the kernel with support for the retbleed=ibpb mitigation. ++ ++config CPU_IBRS_ENTRY ++ bool "Enable IBRS on kernel entry" ++ depends on CPU_SUP_INTEL ++ default y ++ help ++ Compile the kernel with support for the spectre_v2=ibrs mitigation. ++ This mitigates both spectre_v2 and retbleed at great cost to ++ performance. ++ ++config SLS ++ bool "Mitigate Straight-Line-Speculation" ++ depends on CC_HAS_SLS && X86_64 ++ select OBJTOOL if HAVE_OBJTOOL ++ default n ++ help ++ Compile the kernel with straight-line-speculation options to guard ++ against straight line speculation. The kernel image might be slightly ++ larger. ++ ++endif ++ + config ARCH_HAS_ADD_PAGES + def_bool y + depends on ARCH_ENABLE_MEMORY_HOTPLUG +diff --git a/arch/x86/Makefile b/arch/x86/Makefile +index 6e16057737e5..1f40dad30d50 100644 +--- a/arch/x86/Makefile ++++ b/arch/x86/Makefile +@@ -15,14 +15,18 @@ endif + ifdef CONFIG_CC_IS_GCC + RETPOLINE_CFLAGS := $(call cc-option,-mindirect-branch=thunk-extern -mindirect-branch-register) + RETPOLINE_CFLAGS += $(call cc-option,-mindirect-branch-cs-prefix) +-RETPOLINE_CFLAGS += $(call cc-option,-mfunction-return=thunk-extern) + RETPOLINE_VDSO_CFLAGS := $(call cc-option,-mindirect-branch=thunk-inline -mindirect-branch-register) + endif + ifdef CONFIG_CC_IS_CLANG + RETPOLINE_CFLAGS := -mretpoline-external-thunk + RETPOLINE_VDSO_CFLAGS := -mretpoline +-RETPOLINE_CFLAGS += $(call cc-option,-mfunction-return=thunk-extern) + endif ++ ++ifdef CONFIG_RETHUNK ++RETHUNK_CFLAGS := -mfunction-return=thunk-extern ++RETPOLINE_CFLAGS += $(RETHUNK_CFLAGS) ++endif ++ + export RETPOLINE_CFLAGS + export RETPOLINE_VDSO_CFLAGS + +diff --git a/arch/x86/entry/calling.h b/arch/x86/entry/calling.h +index 074d04e434de..f6907627172b 100644 +--- a/arch/x86/entry/calling.h ++++ b/arch/x86/entry/calling.h +@@ -297,6 +297,7 @@ For 32-bit we have the following conventions - kernel is built with + * Assumes x86_spec_ctrl_{base,current} to have SPEC_CTRL_IBRS set. + */ + .macro IBRS_ENTER save_reg ++#ifdef CONFIG_CPU_IBRS_ENTRY + ALTERNATIVE "jmp .Lend_\@", "", X86_FEATURE_KERNEL_IBRS + movl $MSR_IA32_SPEC_CTRL, %ecx + +@@ -317,6 +318,7 @@ For 32-bit we have the following conventions - kernel is built with + shr $32, %rdx + wrmsr + .Lend_\@: ++#endif + .endm + + /* +@@ -324,6 +326,7 @@ For 32-bit we have the following conventions - kernel is built with + * regs. Must be called after the last RET. + */ + .macro IBRS_EXIT save_reg ++#ifdef CONFIG_CPU_IBRS_ENTRY + ALTERNATIVE "jmp .Lend_\@", "", X86_FEATURE_KERNEL_IBRS + movl $MSR_IA32_SPEC_CTRL, %ecx + +@@ -338,6 +341,7 @@ For 32-bit we have the following conventions - kernel is built with + shr $32, %rdx + wrmsr + .Lend_\@: ++#endif + .endm + + /* +diff --git a/arch/x86/include/asm/disabled-features.h b/arch/x86/include/asm/disabled-features.h +index db75da511a36..33d2cd04d254 100644 +--- a/arch/x86/include/asm/disabled-features.h ++++ b/arch/x86/include/asm/disabled-features.h +@@ -54,9 +54,19 @@ + # define DISABLE_RETPOLINE 0 + #else + # define DISABLE_RETPOLINE ((1 << (X86_FEATURE_RETPOLINE & 31)) | \ +- (1 << (X86_FEATURE_RETPOLINE_LFENCE & 31)) | \ +- (1 << (X86_FEATURE_RETHUNK & 31)) | \ +- (1 << (X86_FEATURE_UNRET & 31))) ++ (1 << (X86_FEATURE_RETPOLINE_LFENCE & 31))) ++#endif ++ ++#ifdef CONFIG_RETHUNK ++# define DISABLE_RETHUNK 0 ++#else ++# define DISABLE_RETHUNK (1 << (X86_FEATURE_RETHUNK & 31)) ++#endif ++ ++#ifdef CONFIG_CPU_UNRET_ENTRY ++# define DISABLE_UNRET 0 ++#else ++# define DISABLE_UNRET (1 << (X86_FEATURE_UNRET & 31)) + #endif + + #ifdef CONFIG_INTEL_IOMMU_SVM +@@ -91,7 +101,7 @@ + #define DISABLED_MASK8 (DISABLE_TDX_GUEST) + #define DISABLED_MASK9 (DISABLE_SGX) + #define DISABLED_MASK10 0 +-#define DISABLED_MASK11 (DISABLE_RETPOLINE) ++#define DISABLED_MASK11 (DISABLE_RETPOLINE|DISABLE_RETHUNK|DISABLE_UNRET) + #define DISABLED_MASK12 0 + #define DISABLED_MASK13 0 + #define DISABLED_MASK14 0 +diff --git a/arch/x86/include/asm/linkage.h b/arch/x86/include/asm/linkage.h +index e3ae331cabb1..73ca20049835 100644 +--- a/arch/x86/include/asm/linkage.h ++++ b/arch/x86/include/asm/linkage.h +@@ -19,7 +19,7 @@ + #define __ALIGN_STR __stringify(__ALIGN) + #endif + +-#if defined(CONFIG_RETPOLINE) && !defined(__DISABLE_EXPORTS) && !defined(BUILD_VDSO) ++#if defined(CONFIG_RETHUNK) && !defined(__DISABLE_EXPORTS) && !defined(BUILD_VDSO) + #define RET jmp __x86_return_thunk + #else /* CONFIG_RETPOLINE */ + #ifdef CONFIG_SLS +@@ -31,7 +31,7 @@ + + #else /* __ASSEMBLY__ */ + +-#if defined(CONFIG_RETPOLINE) && !defined(__DISABLE_EXPORTS) && !defined(BUILD_VDSO) ++#if defined(CONFIG_RETHUNK) && !defined(__DISABLE_EXPORTS) && !defined(BUILD_VDSO) + #define ASM_RET "jmp __x86_return_thunk\n\t" + #else /* CONFIG_RETPOLINE */ + #ifdef CONFIG_SLS +diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h +index ccde87e6eabb..bb05ed4f46bd 100644 +--- a/arch/x86/include/asm/nospec-branch.h ++++ b/arch/x86/include/asm/nospec-branch.h +@@ -127,6 +127,12 @@ + .Lskip_rsb_\@: + .endm + ++#ifdef CONFIG_CPU_UNRET_ENTRY ++#define CALL_ZEN_UNTRAIN_RET "call zen_untrain_ret" ++#else ++#define CALL_ZEN_UNTRAIN_RET "" ++#endif ++ + /* + * Mitigate RETBleed for AMD/Hygon Zen uarch. Requires KERNEL CR3 because the + * return thunk isn't mapped into the userspace tables (then again, AMD +@@ -139,10 +145,10 @@ + * where we have a stack but before any RET instruction. + */ + .macro UNTRAIN_RET +-#ifdef CONFIG_RETPOLINE ++#if defined(CONFIG_CPU_UNRET_ENTRY) || defined(CONFIG_CPU_IBPB_ENTRY) + ANNOTATE_UNRET_END + ALTERNATIVE_2 "", \ +- "call zen_untrain_ret", X86_FEATURE_UNRET, \ ++ CALL_ZEN_UNTRAIN_RET, X86_FEATURE_UNRET, \ + "call entry_ibpb", X86_FEATURE_ENTRY_IBPB + #endif + .endm +diff --git a/arch/x86/include/asm/static_call.h b/arch/x86/include/asm/static_call.h +index 70cc9ccb8029..343b722ccaf2 100644 +--- a/arch/x86/include/asm/static_call.h ++++ b/arch/x86/include/asm/static_call.h +@@ -46,7 +46,7 @@ + #define ARCH_DEFINE_STATIC_CALL_TRAMP(name, func) \ + __ARCH_DEFINE_STATIC_CALL_TRAMP(name, ".byte 0xe9; .long " #func " - (. + 4)") + +-#ifdef CONFIG_RETPOLINE ++#ifdef CONFIG_RETHUNK + #define ARCH_DEFINE_STATIC_CALL_NULL_TRAMP(name) \ + __ARCH_DEFINE_STATIC_CALL_TRAMP(name, "jmp __x86_return_thunk") + #else +diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c +index cf447ee18b3c..d6858533e6e5 100644 +--- a/arch/x86/kernel/alternative.c ++++ b/arch/x86/kernel/alternative.c +@@ -508,6 +508,7 @@ void __init_or_module noinline apply_retpolines(s32 *start, s32 *end) + } + } + ++#ifdef CONFIG_RETHUNK + /* + * Rewrite the compiler generated return thunk tail-calls. + * +@@ -569,6 +570,10 @@ void __init_or_module noinline apply_returns(s32 *start, s32 *end) + } + } + } ++#else ++void __init_or_module noinline apply_returns(s32 *start, s32 *end) { } ++#endif /* CONFIG_RETHUNK */ ++ + #else /* !CONFIG_RETPOLINE || !CONFIG_OBJTOOL */ + + void __init_or_module noinline apply_retpolines(s32 *start, s32 *end) { } +diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c +index 9cfd11f7ba11..35d5288394cb 100644 +--- a/arch/x86/kernel/cpu/amd.c ++++ b/arch/x86/kernel/cpu/amd.c +@@ -864,6 +864,7 @@ static void init_amd_bd(struct cpuinfo_x86 *c) + + void init_spectral_chicken(struct cpuinfo_x86 *c) + { ++#ifdef CONFIG_CPU_UNRET_ENTRY + u64 value; + + /* +@@ -880,6 +881,7 @@ void init_spectral_chicken(struct cpuinfo_x86 *c) + wrmsrl_safe(MSR_ZEN2_SPECTRAL_CHICKEN, value); + } + } ++#endif + } + + static void init_amd_zn(struct cpuinfo_x86 *c) +diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c +index 182f8b2e8a3c..cf08a1b8f3c7 100644 +--- a/arch/x86/kernel/cpu/bugs.c ++++ b/arch/x86/kernel/cpu/bugs.c +@@ -842,7 +842,6 @@ static int __init retbleed_parse_cmdline(char *str) + early_param("retbleed", retbleed_parse_cmdline); + + #define RETBLEED_UNTRAIN_MSG "WARNING: BTB untrained return thunk mitigation is only effective on AMD/Hygon!\n" +-#define RETBLEED_COMPILER_MSG "WARNING: kernel not compiled with RETPOLINE or -mfunction-return capable compiler; falling back to IBPB!\n" + #define RETBLEED_INTEL_MSG "WARNING: Spectre v2 mitigation leaves CPU vulnerable to RETBleed attacks, data leaks possible!\n" + + static void __init retbleed_select_mitigation(void) +@@ -857,18 +856,33 @@ static void __init retbleed_select_mitigation(void) + return; + + case RETBLEED_CMD_UNRET: +- retbleed_mitigation = RETBLEED_MITIGATION_UNRET; ++ if (IS_ENABLED(CONFIG_CPU_UNRET_ENTRY)) { ++ retbleed_mitigation = RETBLEED_MITIGATION_UNRET; ++ } else { ++ pr_err("WARNING: kernel not compiled with CPU_UNRET_ENTRY.\n"); ++ goto do_cmd_auto; ++ } + break; + + case RETBLEED_CMD_IBPB: +- retbleed_mitigation = RETBLEED_MITIGATION_IBPB; ++ if (IS_ENABLED(CONFIG_CPU_IBPB_ENTRY)) { ++ retbleed_mitigation = RETBLEED_MITIGATION_IBPB; ++ } else { ++ pr_err("WARNING: kernel not compiled with CPU_IBPB_ENTRY.\n"); ++ goto do_cmd_auto; ++ } + break; + ++do_cmd_auto: + case RETBLEED_CMD_AUTO: + default: + if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD || +- boot_cpu_data.x86_vendor == X86_VENDOR_HYGON) +- retbleed_mitigation = RETBLEED_MITIGATION_UNRET; ++ boot_cpu_data.x86_vendor == X86_VENDOR_HYGON) { ++ if (IS_ENABLED(CONFIG_CPU_UNRET_ENTRY)) ++ retbleed_mitigation = RETBLEED_MITIGATION_UNRET; ++ else if (IS_ENABLED(CONFIG_CPU_IBPB_ENTRY)) ++ retbleed_mitigation = RETBLEED_MITIGATION_IBPB; ++ } + + /* + * The Intel mitigation (IBRS or eIBRS) was already selected in +@@ -881,14 +895,6 @@ static void __init retbleed_select_mitigation(void) + + switch (retbleed_mitigation) { + case RETBLEED_MITIGATION_UNRET: +- +- if (!IS_ENABLED(CONFIG_RETPOLINE) || +- !IS_ENABLED(CONFIG_CC_HAS_RETURN_THUNK)) { +- pr_err(RETBLEED_COMPILER_MSG); +- retbleed_mitigation = RETBLEED_MITIGATION_IBPB; +- goto retbleed_force_ibpb; +- } +- + setup_force_cpu_cap(X86_FEATURE_RETHUNK); + setup_force_cpu_cap(X86_FEATURE_UNRET); + +@@ -900,7 +906,6 @@ static void __init retbleed_select_mitigation(void) + break; + + case RETBLEED_MITIGATION_IBPB: +-retbleed_force_ibpb: + setup_force_cpu_cap(X86_FEATURE_ENTRY_IBPB); + mitigate_smt = true; + break; +@@ -1271,6 +1276,12 @@ static enum spectre_v2_mitigation_cmd __init spectre_v2_parse_cmdline(void) + return SPECTRE_V2_CMD_AUTO; + } + ++ if (cmd == SPECTRE_V2_CMD_IBRS && !IS_ENABLED(CONFIG_CPU_IBRS_ENTRY)) { ++ pr_err("%s selected but not compiled in. Switching to AUTO select\n", ++ mitigation_options[i].option); ++ return SPECTRE_V2_CMD_AUTO; ++ } ++ + if (cmd == SPECTRE_V2_CMD_IBRS && boot_cpu_data.x86_vendor != X86_VENDOR_INTEL) { + pr_err("%s selected but not Intel CPU. Switching to AUTO select\n", + mitigation_options[i].option); +@@ -1328,7 +1339,8 @@ static void __init spectre_v2_select_mitigation(void) + break; + } + +- if (boot_cpu_has_bug(X86_BUG_RETBLEED) && ++ if (IS_ENABLED(CONFIG_CPU_IBRS_ENTRY) && ++ boot_cpu_has_bug(X86_BUG_RETBLEED) && + retbleed_cmd != RETBLEED_CMD_OFF && + boot_cpu_has(X86_FEATURE_IBRS) && + boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) { +diff --git a/arch/x86/kernel/static_call.c b/arch/x86/kernel/static_call.c +index fe21fe778185..be7038a0da4d 100644 +--- a/arch/x86/kernel/static_call.c ++++ b/arch/x86/kernel/static_call.c +@@ -126,7 +126,7 @@ void arch_static_call_transform(void *site, void *tramp, void *func, bool tail) + } + EXPORT_SYMBOL_GPL(arch_static_call_transform); + +-#ifdef CONFIG_RETPOLINE ++#ifdef CONFIG_RETHUNK + /* + * This is called by apply_returns() to fix up static call trampolines, + * specifically ARCH_DEFINE_STATIC_CALL_NULL_TRAMP which is recorded as +diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c +index b01437015f99..db96bf7d1122 100644 +--- a/arch/x86/kvm/emulate.c ++++ b/arch/x86/kvm/emulate.c +@@ -439,10 +439,10 @@ static int fastop(struct x86_emulate_ctxt *ctxt, fastop_t fop); + * + * ENDBR [4 bytes; CONFIG_X86_KERNEL_IBT] + * SETcc %al [3 bytes] +- * RET | JMP __x86_return_thunk [1,5 bytes; CONFIG_RETPOLINE] ++ * RET | JMP __x86_return_thunk [1,5 bytes; CONFIG_RETHUNK] + * INT3 [1 byte; CONFIG_SLS] + */ +-#define RET_LENGTH (1 + (4 * IS_ENABLED(CONFIG_RETPOLINE)) + \ ++#define RET_LENGTH (1 + (4 * IS_ENABLED(CONFIG_RETHUNK)) + \ + IS_ENABLED(CONFIG_SLS)) + #define SETCC_LENGTH (ENDBR_INSN_SIZE + 3 + RET_LENGTH) + #define SETCC_ALIGN (4 << ((SETCC_LENGTH > 4) & 1) << ((SETCC_LENGTH > 8) & 1)) +diff --git a/arch/x86/lib/retpoline.S b/arch/x86/lib/retpoline.S +index fdd16163b996..073289a55f84 100644 +--- a/arch/x86/lib/retpoline.S ++++ b/arch/x86/lib/retpoline.S +@@ -72,6 +72,8 @@ SYM_CODE_END(__x86_indirect_thunk_array) + * This function name is magical and is used by -mfunction-return=thunk-extern + * for the compiler to generate JMPs to it. + */ ++#ifdef CONFIG_RETHUNK ++ + .section .text.__x86.return_thunk + + /* +@@ -136,3 +138,5 @@ SYM_FUNC_END(zen_untrain_ret) + __EXPORT_THUNK(zen_untrain_ret) + + EXPORT_SYMBOL(__x86_return_thunk) ++ ++#endif /* CONFIG_RETHUNK */ +diff --git a/scripts/Makefile.lib b/scripts/Makefile.lib +index d1425778664b..3fb6a99e78c4 100644 +--- a/scripts/Makefile.lib ++++ b/scripts/Makefile.lib +@@ -236,6 +236,7 @@ objtool_args = \ + $(if $(CONFIG_FTRACE_MCOUNT_USE_OBJTOOL), --mcount) \ + $(if $(CONFIG_UNWINDER_ORC), --orc) \ + $(if $(CONFIG_RETPOLINE), --retpoline) \ ++ $(if $(CONFIG_RETHUNK), --rethunk) \ + $(if $(CONFIG_SLS), --sls) \ + $(if $(CONFIG_STACK_VALIDATION), --stackval) \ + $(if $(CONFIG_HAVE_STATIC_CALL_INLINE), --static-call) \ +diff --git a/scripts/Makefile.vmlinux_o b/scripts/Makefile.vmlinux_o +index bc67748044a6..84019814f33f 100644 +--- a/scripts/Makefile.vmlinux_o ++++ b/scripts/Makefile.vmlinux_o +@@ -44,7 +44,7 @@ objtool-enabled := $(or $(delay-objtool),$(CONFIG_NOINSTR_VALIDATION)) + + objtool_args := \ + $(if $(delay-objtool),$(objtool_args)) \ +- $(if $(CONFIG_NOINSTR_VALIDATION), --noinstr $(if $(CONFIG_RETPOLINE), --unret)) \ ++ $(if $(CONFIG_NOINSTR_VALIDATION), --noinstr $(if $(CONFIG_CPU_UNRET_ENTRY), --unret)) \ + $(if $(CONFIG_GCOV_KERNEL), --no-unreachable) \ + --link + +diff --git a/security/Kconfig b/security/Kconfig +index f29e4c656983..e6db09a779b7 100644 +--- a/security/Kconfig ++++ b/security/Kconfig +@@ -54,17 +54,6 @@ config SECURITY_NETWORK + implement socket and networking access controls. + If you are unsure how to answer this question, answer N. + +-config PAGE_TABLE_ISOLATION +- bool "Remove the kernel mapping in user mode" +- default y +- depends on (X86_64 || X86_PAE) && !UML +- help +- This feature reduces the number of hardware side channels by +- ensuring that the majority of kernel addresses are not mapped +- into userspace. +- +- See Documentation/x86/pti.rst for more details. +- + config SECURITY_INFINIBAND + bool "Infiniband Security Hooks" + depends on SECURITY && INFINIBAND +diff --git a/tools/objtool/builtin-check.c b/tools/objtool/builtin-check.c +index c063e1ff96b2..24fbe803a0d3 100644 +--- a/tools/objtool/builtin-check.c ++++ b/tools/objtool/builtin-check.c +@@ -68,6 +68,7 @@ const struct option check_options[] = { + OPT_BOOLEAN('n', "noinstr", &opts.noinstr, "validate noinstr rules"), + OPT_BOOLEAN('o', "orc", &opts.orc, "generate ORC metadata"), + OPT_BOOLEAN('r', "retpoline", &opts.retpoline, "validate and annotate retpoline usage"), ++ OPT_BOOLEAN(0, "rethunk", &opts.rethunk, "validate and annotate rethunk usage"), + OPT_BOOLEAN(0, "unret", &opts.unret, "validate entry unret placement"), + OPT_BOOLEAN('l', "sls", &opts.sls, "validate straight-line-speculation mitigations"), + OPT_BOOLEAN('s', "stackval", &opts.stackval, "validate frame pointer rules"), +@@ -124,6 +125,7 @@ static bool opts_valid(void) + opts.noinstr || + opts.orc || + opts.retpoline || ++ opts.rethunk || + opts.sls || + opts.stackval || + opts.static_call || +@@ -136,6 +138,11 @@ static bool opts_valid(void) + return true; + } + ++ if (opts.unret && !opts.rethunk) { ++ ERROR("--unret requires --rethunk"); ++ return false; ++ } ++ + if (opts.dump_orc) + return true; + +diff --git a/tools/objtool/check.c b/tools/objtool/check.c +index ddfdd138cc2a..7bebdb8867cd 100644 +--- a/tools/objtool/check.c ++++ b/tools/objtool/check.c +@@ -3732,8 +3732,11 @@ static int validate_retpoline(struct objtool_file *file) + continue; + + if (insn->type == INSN_RETURN) { +- WARN_FUNC("'naked' return found in RETPOLINE build", +- insn->sec, insn->offset); ++ if (opts.rethunk) { ++ WARN_FUNC("'naked' return found in RETHUNK build", ++ insn->sec, insn->offset); ++ } else ++ continue; + } else { + WARN_FUNC("indirect %s found in RETPOLINE build", + insn->sec, insn->offset, +@@ -4264,7 +4267,9 @@ int check(struct objtool_file *file) + if (ret < 0) + goto out; + warnings += ret; ++ } + ++ if (opts.rethunk) { + ret = create_return_sites_sections(file); + if (ret < 0) + goto out; +diff --git a/tools/objtool/include/objtool/builtin.h b/tools/objtool/include/objtool/builtin.h +index 0c476b0b40a3..42a52f1a0add 100644 +--- a/tools/objtool/include/objtool/builtin.h ++++ b/tools/objtool/include/objtool/builtin.h +@@ -19,6 +19,7 @@ struct opts { + bool noinstr; + bool orc; + bool retpoline; ++ bool rethunk; + bool unret; + bool sls; + bool stackval; diff --git a/debian/patches/bugfix/x86/retbleed/0050-x86-bugs-Add-Cannon-lake-to-RETBleed-affected-CPU-li.patch b/debian/patches/bugfix/x86/retbleed/0050-x86-bugs-Add-Cannon-lake-to-RETBleed-affected-CPU-li.patch new file mode 100644 index 000000000..f94c89e3a --- /dev/null +++ b/debian/patches/bugfix/x86/retbleed/0050-x86-bugs-Add-Cannon-lake-to-RETBleed-affected-CPU-li.patch @@ -0,0 +1,26 @@ +From: Pawan Gupta <pawan.kumar.gupta@linux.intel.com> +Date: Wed, 6 Jul 2022 15:01:15 -0700 +Subject: x86/bugs: Add Cannon lake to RETBleed affected CPU list +Origin: https://git.kernel.org/linus/f54d45372c6ac9c993451de5e51312485f7d10bc + +Cannon lake is also affected by RETBleed, add it to the list. + +Fixes: 6ad0ad2bf8a6 ("x86/bugs: Report Intel retbleed vulnerability") +Signed-off-by: Pawan Gupta <pawan.kumar.gupta@linux.intel.com> +Signed-off-by: Borislav Petkov <bp@suse.de> +--- + arch/x86/kernel/cpu/common.c | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c +index 02ba27355b04..736262a76a12 100644 +--- a/arch/x86/kernel/cpu/common.c ++++ b/arch/x86/kernel/cpu/common.c +@@ -1242,6 +1242,7 @@ static const struct x86_cpu_id cpu_vuln_blacklist[] __initconst = { + VULNBL_INTEL_STEPPINGS(SKYLAKE, X86_STEPPING_ANY, SRBDS | MMIO | RETBLEED), + VULNBL_INTEL_STEPPINGS(KABYLAKE_L, X86_STEPPING_ANY, SRBDS | MMIO | RETBLEED), + VULNBL_INTEL_STEPPINGS(KABYLAKE, X86_STEPPING_ANY, SRBDS | MMIO | RETBLEED), ++ VULNBL_INTEL_STEPPINGS(CANNONLAKE_L, X86_STEPPING_ANY, RETBLEED), + VULNBL_INTEL_STEPPINGS(ICELAKE_L, X86_STEPPING_ANY, MMIO | MMIO_SBDS | RETBLEED), + VULNBL_INTEL_STEPPINGS(ICELAKE_D, X86_STEPPING_ANY, MMIO), + VULNBL_INTEL_STEPPINGS(ICELAKE_X, X86_STEPPING_ANY, MMIO), diff --git a/debian/patches/bugfix/x86/retbleed/0051-x86-entry-Move-PUSH_AND_CLEAR_REGS-back-into-error_e.patch b/debian/patches/bugfix/x86/retbleed/0051-x86-entry-Move-PUSH_AND_CLEAR_REGS-back-into-error_e.patch new file mode 100644 index 000000000..b94645ffc --- /dev/null +++ b/debian/patches/bugfix/x86/retbleed/0051-x86-entry-Move-PUSH_AND_CLEAR_REGS-back-into-error_e.patch @@ -0,0 +1,67 @@ +From: Peter Zijlstra <peterz@infradead.org> +Date: Wed, 6 Jul 2022 15:33:30 +0200 +Subject: x86/entry: Move PUSH_AND_CLEAR_REGS() back into error_entry +Origin: https://git.kernel.org/linus/2c08b9b38f5b0f4a6c2d29be22b695e4ec4a556b + +Commit + + ee774dac0da1 ("x86/entry: Move PUSH_AND_CLEAR_REGS out of error_entry()") + +moved PUSH_AND_CLEAR_REGS out of error_entry, into its own function, in +part to avoid calling error_entry() for XenPV. + +However, commit + + 7c81c0c9210c ("x86/entry: Avoid very early RET") + +had to change that because the 'ret' was too early and moved it into +idtentry, bloating the text size, since idtentry is expanded for every +exception vector. + +However, with the advent of xen_error_entry() in commit + + d147553b64bad ("x86/xen: Add UNTRAIN_RET") + +it became possible to remove PUSH_AND_CLEAR_REGS from idtentry, back +into *error_entry(). + +Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> +Signed-off-by: Borislav Petkov <bp@suse.de> +--- + arch/x86/entry/entry_64.S | 9 ++++++--- + 1 file changed, 6 insertions(+), 3 deletions(-) + +diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S +index a4ba162e52c3..285e043a3e40 100644 +--- a/arch/x86/entry/entry_64.S ++++ b/arch/x86/entry/entry_64.S +@@ -328,6 +328,8 @@ SYM_CODE_END(ret_from_fork) + + SYM_CODE_START_LOCAL(xen_error_entry) + UNWIND_HINT_FUNC ++ PUSH_AND_CLEAR_REGS save_ret=1 ++ ENCODE_FRAME_POINTER 8 + UNTRAIN_RET + RET + SYM_CODE_END(xen_error_entry) +@@ -339,9 +341,6 @@ SYM_CODE_END(xen_error_entry) + */ + .macro idtentry_body cfunc has_error_code:req + +- PUSH_AND_CLEAR_REGS +- ENCODE_FRAME_POINTER +- + /* + * Call error_entry() and switch to the task stack if from userspace. + * +@@ -1042,6 +1041,10 @@ SYM_CODE_END(paranoid_exit) + */ + SYM_CODE_START_LOCAL(error_entry) + UNWIND_HINT_FUNC ++ ++ PUSH_AND_CLEAR_REGS save_ret=1 ++ ENCODE_FRAME_POINTER 8 ++ + testb $3, CS+8(%rsp) + jz .Lerror_kernelspace + diff --git a/debian/patches/bugfix/x86/retbleed/0052-x86-bugs-Do-not-enable-IBPB-on-entry-when-IBPB-is-no.patch b/debian/patches/bugfix/x86/retbleed/0052-x86-bugs-Do-not-enable-IBPB-on-entry-when-IBPB-is-no.patch new file mode 100644 index 000000000..b21ca419c --- /dev/null +++ b/debian/patches/bugfix/x86/retbleed/0052-x86-bugs-Do-not-enable-IBPB-on-entry-when-IBPB-is-no.patch @@ -0,0 +1,44 @@ +From: Thadeu Lima de Souza Cascardo <cascardo@canonical.com> +Date: Thu, 7 Jul 2022 13:41:52 -0300 +Subject: x86/bugs: Do not enable IBPB-on-entry when IBPB is not supported +Origin: https://git.kernel.org/linus/2259da159fbe5dba8ac00b560cf00b6a6537fa18 + +There are some VM configurations which have Skylake model but do not +support IBPB. In those cases, when using retbleed=ibpb, userspace is going +to be killed and kernel is going to panic. + +If the CPU does not support IBPB, warn and proceed with the auto option. Also, +do not fallback to IBPB on AMD/Hygon systems if it is not supported. + +Fixes: 3ebc17006888 ("x86/bugs: Add retbleed=ibpb") +Signed-off-by: Thadeu Lima de Souza Cascardo <cascardo@canonical.com> +Signed-off-by: Borislav Petkov <bp@suse.de> +--- + arch/x86/kernel/cpu/bugs.c | 7 +++++-- + 1 file changed, 5 insertions(+), 2 deletions(-) + +diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c +index cf08a1b8f3c7..d26c57d98b98 100644 +--- a/arch/x86/kernel/cpu/bugs.c ++++ b/arch/x86/kernel/cpu/bugs.c +@@ -865,7 +865,10 @@ static void __init retbleed_select_mitigation(void) + break; + + case RETBLEED_CMD_IBPB: +- if (IS_ENABLED(CONFIG_CPU_IBPB_ENTRY)) { ++ if (!boot_cpu_has(X86_FEATURE_IBPB)) { ++ pr_err("WARNING: CPU does not support IBPB.\n"); ++ goto do_cmd_auto; ++ } else if (IS_ENABLED(CONFIG_CPU_IBPB_ENTRY)) { + retbleed_mitigation = RETBLEED_MITIGATION_IBPB; + } else { + pr_err("WARNING: kernel not compiled with CPU_IBPB_ENTRY.\n"); +@@ -880,7 +883,7 @@ static void __init retbleed_select_mitigation(void) + boot_cpu_data.x86_vendor == X86_VENDOR_HYGON) { + if (IS_ENABLED(CONFIG_CPU_UNRET_ENTRY)) + retbleed_mitigation = RETBLEED_MITIGATION_UNRET; +- else if (IS_ENABLED(CONFIG_CPU_IBPB_ENTRY)) ++ else if (IS_ENABLED(CONFIG_CPU_IBPB_ENTRY) && boot_cpu_has(X86_FEATURE_IBPB)) + retbleed_mitigation = RETBLEED_MITIGATION_IBPB; + } + diff --git a/debian/patches/bugfix/x86/retbleed/0053-x86-kexec-Disable-RET-on-kexec.patch b/debian/patches/bugfix/x86/retbleed/0053-x86-kexec-Disable-RET-on-kexec.patch new file mode 100644 index 000000000..bbd191afb --- /dev/null +++ b/debian/patches/bugfix/x86/retbleed/0053-x86-kexec-Disable-RET-on-kexec.patch @@ -0,0 +1,171 @@ +From: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> +Date: Fri, 8 Jul 2022 19:10:11 +0200 +Subject: x86/kexec: Disable RET on kexec +Origin: https://git.kernel.org/linus/697977d8415d61f3acbc4ee6d564c9dcf0309507 + +All the invocations unroll to __x86_return_thunk and this file +must be PIC independent. + +This fixes kexec on 64-bit AMD boxes. + + [ bp: Fix 32-bit build. ] + +Reported-by: Edward Tran <edward.tran@oracle.com> +Reported-by: Awais Tanveer <awais.tanveer@oracle.com> +Suggested-by: Ankur Arora <ankur.a.arora@oracle.com> +Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> +Signed-off-by: Alexandre Chartre <alexandre.chartre@oracle.com> +Signed-off-by: Borislav Petkov <bp@suse.de> +--- + arch/x86/kernel/relocate_kernel_32.S | 25 +++++++++++++++++++------ + arch/x86/kernel/relocate_kernel_64.S | 23 +++++++++++++++++------ + 2 files changed, 36 insertions(+), 12 deletions(-) + +diff --git a/arch/x86/kernel/relocate_kernel_32.S b/arch/x86/kernel/relocate_kernel_32.S +index fcc8a7699103..c7c4b1917336 100644 +--- a/arch/x86/kernel/relocate_kernel_32.S ++++ b/arch/x86/kernel/relocate_kernel_32.S +@@ -7,10 +7,12 @@ + #include <linux/linkage.h> + #include <asm/page_types.h> + #include <asm/kexec.h> ++#include <asm/nospec-branch.h> + #include <asm/processor-flags.h> + + /* +- * Must be relocatable PIC code callable as a C function ++ * Must be relocatable PIC code callable as a C function, in particular ++ * there must be a plain RET and not jump to return thunk. + */ + + #define PTR(x) (x << 2) +@@ -91,7 +93,9 @@ SYM_CODE_START_NOALIGN(relocate_kernel) + movl %edi, %eax + addl $(identity_mapped - relocate_kernel), %eax + pushl %eax +- RET ++ ANNOTATE_UNRET_SAFE ++ ret ++ int3 + SYM_CODE_END(relocate_kernel) + + SYM_CODE_START_LOCAL_NOALIGN(identity_mapped) +@@ -159,12 +163,15 @@ SYM_CODE_START_LOCAL_NOALIGN(identity_mapped) + xorl %edx, %edx + xorl %esi, %esi + xorl %ebp, %ebp +- RET ++ ANNOTATE_UNRET_SAFE ++ ret ++ int3 + 1: + popl %edx + movl CP_PA_SWAP_PAGE(%edi), %esp + addl $PAGE_SIZE, %esp + 2: ++ ANNOTATE_RETPOLINE_SAFE + call *%edx + + /* get the re-entry point of the peer system */ +@@ -190,7 +197,9 @@ SYM_CODE_START_LOCAL_NOALIGN(identity_mapped) + movl %edi, %eax + addl $(virtual_mapped - relocate_kernel), %eax + pushl %eax +- RET ++ ANNOTATE_UNRET_SAFE ++ ret ++ int3 + SYM_CODE_END(identity_mapped) + + SYM_CODE_START_LOCAL_NOALIGN(virtual_mapped) +@@ -208,7 +217,9 @@ SYM_CODE_START_LOCAL_NOALIGN(virtual_mapped) + popl %edi + popl %esi + popl %ebx +- RET ++ ANNOTATE_UNRET_SAFE ++ ret ++ int3 + SYM_CODE_END(virtual_mapped) + + /* Do the copies */ +@@ -271,7 +282,9 @@ SYM_CODE_START_LOCAL_NOALIGN(swap_pages) + popl %edi + popl %ebx + popl %ebp +- RET ++ ANNOTATE_UNRET_SAFE ++ ret ++ int3 + SYM_CODE_END(swap_pages) + + .globl kexec_control_code_size +diff --git a/arch/x86/kernel/relocate_kernel_64.S b/arch/x86/kernel/relocate_kernel_64.S +index c1d8626c53b6..4809c0dc4eb0 100644 +--- a/arch/x86/kernel/relocate_kernel_64.S ++++ b/arch/x86/kernel/relocate_kernel_64.S +@@ -13,7 +13,8 @@ + #include <asm/unwind_hints.h> + + /* +- * Must be relocatable PIC code callable as a C function ++ * Must be relocatable PIC code callable as a C function, in particular ++ * there must be a plain RET and not jump to return thunk. + */ + + #define PTR(x) (x << 3) +@@ -105,7 +106,9 @@ SYM_CODE_START_NOALIGN(relocate_kernel) + /* jump to identity mapped page */ + addq $(identity_mapped - relocate_kernel), %r8 + pushq %r8 +- RET ++ ANNOTATE_UNRET_SAFE ++ ret ++ int3 + SYM_CODE_END(relocate_kernel) + + SYM_CODE_START_LOCAL_NOALIGN(identity_mapped) +@@ -200,7 +203,9 @@ SYM_CODE_START_LOCAL_NOALIGN(identity_mapped) + xorl %r14d, %r14d + xorl %r15d, %r15d + +- RET ++ ANNOTATE_UNRET_SAFE ++ ret ++ int3 + + 1: + popq %rdx +@@ -219,7 +224,9 @@ SYM_CODE_START_LOCAL_NOALIGN(identity_mapped) + call swap_pages + movq $virtual_mapped, %rax + pushq %rax +- RET ++ ANNOTATE_UNRET_SAFE ++ ret ++ int3 + SYM_CODE_END(identity_mapped) + + SYM_CODE_START_LOCAL_NOALIGN(virtual_mapped) +@@ -241,7 +248,9 @@ SYM_CODE_START_LOCAL_NOALIGN(virtual_mapped) + popq %r12 + popq %rbp + popq %rbx +- RET ++ ANNOTATE_UNRET_SAFE ++ ret ++ int3 + SYM_CODE_END(virtual_mapped) + + /* Do the copies */ +@@ -298,7 +307,9 @@ SYM_CODE_START_LOCAL_NOALIGN(swap_pages) + lea PAGE_SIZE(%rax), %rsi + jmp 0b + 3: +- RET ++ ANNOTATE_UNRET_SAFE ++ ret ++ int3 + SYM_CODE_END(swap_pages) + + .globl kexec_control_code_size diff --git a/debian/patches/bugfix/x86/retbleed/0054-x86-speculation-Disable-RRSBA-behavior.patch b/debian/patches/bugfix/x86/retbleed/0054-x86-speculation-Disable-RRSBA-behavior.patch new file mode 100644 index 000000000..441a2a8e2 --- /dev/null +++ b/debian/patches/bugfix/x86/retbleed/0054-x86-speculation-Disable-RRSBA-behavior.patch @@ -0,0 +1,157 @@ +From: Pawan Gupta <pawan.kumar.gupta@linux.intel.com> +Date: Fri, 8 Jul 2022 13:36:09 -0700 +Subject: x86/speculation: Disable RRSBA behavior +Origin: https://git.kernel.org/linus/4ad3278df6fe2b0852b00d5757fc2ccd8e92c26e + +Some Intel processors may use alternate predictors for RETs on +RSB-underflow. This condition may be vulnerable to Branch History +Injection (BHI) and intramode-BTI. + +Kernel earlier added spectre_v2 mitigation modes (eIBRS+Retpolines, +eIBRS+LFENCE, Retpolines) which protect indirect CALLs and JMPs against +such attacks. However, on RSB-underflow, RET target prediction may +fallback to alternate predictors. As a result, RET's predicted target +may get influenced by branch history. + +A new MSR_IA32_SPEC_CTRL bit (RRSBA_DIS_S) controls this fallback +behavior when in kernel mode. When set, RETs will not take predictions +from alternate predictors, hence mitigating RETs as well. Support for +this is enumerated by CPUID.7.2.EDX[RRSBA_CTRL] (bit2). + +For spectre v2 mitigation, when a user selects a mitigation that +protects indirect CALLs and JMPs against BHI and intramode-BTI, set +RRSBA_DIS_S also to protect RETs for RSB-underflow case. + +Signed-off-by: Pawan Gupta <pawan.kumar.gupta@linux.intel.com> +Signed-off-by: Borislav Petkov <bp@suse.de> +--- + arch/x86/include/asm/cpufeatures.h | 2 +- + arch/x86/include/asm/msr-index.h | 9 +++++++++ + arch/x86/kernel/cpu/bugs.c | 26 ++++++++++++++++++++++++++ + arch/x86/kernel/cpu/scattered.c | 1 + + tools/arch/x86/include/asm/msr-index.h | 9 +++++++++ + 5 files changed, 46 insertions(+), 1 deletion(-) + +diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h +index 7e8099fd5ec1..00f5227c8459 100644 +--- a/arch/x86/include/asm/cpufeatures.h ++++ b/arch/x86/include/asm/cpufeatures.h +@@ -297,7 +297,7 @@ + #define X86_FEATURE_SGX1 (11*32+ 8) /* "" Basic SGX */ + #define X86_FEATURE_SGX2 (11*32+ 9) /* "" SGX Enclave Dynamic Memory Management (EDMM) */ + #define X86_FEATURE_ENTRY_IBPB (11*32+10) /* "" Issue an IBPB on kernel entry */ +-/* FREE! (11*32+11) */ ++#define X86_FEATURE_RRSBA_CTRL (11*32+11) /* "" RET prediction control */ + #define X86_FEATURE_RETPOLINE (11*32+12) /* "" Generic Retpoline mitigation for Spectre variant 2 */ + #define X86_FEATURE_RETPOLINE_LFENCE (11*32+13) /* "" Use LFENCE for Spectre variant 2 */ + #define X86_FEATURE_RETHUNK (11*32+14) /* "" Use REturn THUNK */ +diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h +index 7b411d9a6efb..cc615be27a54 100644 +--- a/arch/x86/include/asm/msr-index.h ++++ b/arch/x86/include/asm/msr-index.h +@@ -51,6 +51,8 @@ + #define SPEC_CTRL_STIBP BIT(SPEC_CTRL_STIBP_SHIFT) /* STIBP mask */ + #define SPEC_CTRL_SSBD_SHIFT 2 /* Speculative Store Bypass Disable bit */ + #define SPEC_CTRL_SSBD BIT(SPEC_CTRL_SSBD_SHIFT) /* Speculative Store Bypass Disable */ ++#define SPEC_CTRL_RRSBA_DIS_S_SHIFT 6 /* Disable RRSBA behavior */ ++#define SPEC_CTRL_RRSBA_DIS_S BIT(SPEC_CTRL_RRSBA_DIS_S_SHIFT) + + #define MSR_IA32_PRED_CMD 0x00000049 /* Prediction Command */ + #define PRED_CMD_IBPB BIT(0) /* Indirect Branch Prediction Barrier */ +@@ -141,6 +143,13 @@ + * bit available to control VERW + * behavior. + */ ++#define ARCH_CAP_RRSBA BIT(19) /* ++ * Indicates RET may use predictors ++ * other than the RSB. With eIBRS ++ * enabled predictions in kernel mode ++ * are restricted to targets in ++ * kernel. ++ */ + + #define MSR_IA32_FLUSH_CMD 0x0000010b + #define L1D_FLUSH BIT(0) /* +diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c +index d26c57d98b98..0dd04713434b 100644 +--- a/arch/x86/kernel/cpu/bugs.c ++++ b/arch/x86/kernel/cpu/bugs.c +@@ -1318,6 +1318,22 @@ static enum spectre_v2_mitigation __init spectre_v2_select_retpoline(void) + return SPECTRE_V2_RETPOLINE; + } + ++/* Disable in-kernel use of non-RSB RET predictors */ ++static void __init spec_ctrl_disable_kernel_rrsba(void) ++{ ++ u64 ia32_cap; ++ ++ if (!boot_cpu_has(X86_FEATURE_RRSBA_CTRL)) ++ return; ++ ++ ia32_cap = x86_read_arch_cap_msr(); ++ ++ if (ia32_cap & ARCH_CAP_RRSBA) { ++ x86_spec_ctrl_base |= SPEC_CTRL_RRSBA_DIS_S; ++ write_spec_ctrl_current(x86_spec_ctrl_base, true); ++ } ++} ++ + static void __init spectre_v2_select_mitigation(void) + { + enum spectre_v2_mitigation_cmd cmd = spectre_v2_parse_cmdline(); +@@ -1412,6 +1428,16 @@ static void __init spectre_v2_select_mitigation(void) + break; + } + ++ /* ++ * Disable alternate RSB predictions in kernel when indirect CALLs and ++ * JMPs gets protection against BHI and Intramode-BTI, but RET ++ * prediction from a non-RSB predictor is still a risk. ++ */ ++ if (mode == SPECTRE_V2_EIBRS_LFENCE || ++ mode == SPECTRE_V2_EIBRS_RETPOLINE || ++ mode == SPECTRE_V2_RETPOLINE) ++ spec_ctrl_disable_kernel_rrsba(); ++ + spectre_v2_enabled = mode; + pr_info("%s\n", spectre_v2_strings[mode]); + +diff --git a/arch/x86/kernel/cpu/scattered.c b/arch/x86/kernel/cpu/scattered.c +index dbaa8326d6f2..fd44b54c90d5 100644 +--- a/arch/x86/kernel/cpu/scattered.c ++++ b/arch/x86/kernel/cpu/scattered.c +@@ -27,6 +27,7 @@ static const struct cpuid_bit cpuid_bits[] = { + { X86_FEATURE_APERFMPERF, CPUID_ECX, 0, 0x00000006, 0 }, + { X86_FEATURE_EPB, CPUID_ECX, 3, 0x00000006, 0 }, + { X86_FEATURE_INTEL_PPIN, CPUID_EBX, 0, 0x00000007, 1 }, ++ { X86_FEATURE_RRSBA_CTRL, CPUID_EDX, 2, 0x00000007, 2 }, + { X86_FEATURE_CQM_LLC, CPUID_EDX, 1, 0x0000000f, 0 }, + { X86_FEATURE_CQM_OCCUP_LLC, CPUID_EDX, 0, 0x0000000f, 1 }, + { X86_FEATURE_CQM_MBM_TOTAL, CPUID_EDX, 1, 0x0000000f, 1 }, +diff --git a/tools/arch/x86/include/asm/msr-index.h b/tools/arch/x86/include/asm/msr-index.h +index d27e0581b777..2eab6a3a8a8c 100644 +--- a/tools/arch/x86/include/asm/msr-index.h ++++ b/tools/arch/x86/include/asm/msr-index.h +@@ -51,6 +51,8 @@ + #define SPEC_CTRL_STIBP BIT(SPEC_CTRL_STIBP_SHIFT) /* STIBP mask */ + #define SPEC_CTRL_SSBD_SHIFT 2 /* Speculative Store Bypass Disable bit */ + #define SPEC_CTRL_SSBD BIT(SPEC_CTRL_SSBD_SHIFT) /* Speculative Store Bypass Disable */ ++#define SPEC_CTRL_RRSBA_DIS_S_SHIFT 6 /* Disable RRSBA behavior */ ++#define SPEC_CTRL_RRSBA_DIS_S BIT(SPEC_CTRL_RRSBA_DIS_S_SHIFT) + + #define MSR_IA32_PRED_CMD 0x00000049 /* Prediction Command */ + #define PRED_CMD_IBPB BIT(0) /* Indirect Branch Prediction Barrier */ +@@ -140,6 +142,13 @@ + * bit available to control VERW + * behavior. + */ ++#define ARCH_CAP_RRSBA BIT(19) /* ++ * Indicates RET may use predictors ++ * other than the RSB. With eIBRS ++ * enabled predictions in kernel mode ++ * are restricted to targets in ++ * kernel. ++ */ + + #define MSR_IA32_FLUSH_CMD 0x0000010b + #define L1D_FLUSH BIT(0) /* diff --git a/debian/patches/bugfix/x86/retbleed/0055-x86-static_call-Serialize-__static_call_fixup-proper.patch b/debian/patches/bugfix/x86/retbleed/0055-x86-static_call-Serialize-__static_call_fixup-proper.patch new file mode 100644 index 000000000..0fb2a7efa --- /dev/null +++ b/debian/patches/bugfix/x86/retbleed/0055-x86-static_call-Serialize-__static_call_fixup-proper.patch @@ -0,0 +1,70 @@ +From: Thomas Gleixner <tglx@linutronix.de> +Date: Tue, 12 Jul 2022 14:01:06 +0200 +Subject: x86/static_call: Serialize __static_call_fixup() properly +Origin: https://git.kernel.org/linus/c27c753ea6fd1237f4f96abf8b623d7bab505513 + +__static_call_fixup() invokes __static_call_transform() without holding +text_mutex, which causes lockdep to complain in text_poke_bp(). + +Adding the proper locking cures that, but as this is either used during +early boot or during module finalizing, it's not required to use +text_poke_bp(). Add an argument to __static_call_transform() which tells +it to use text_poke_early() for it. + +Fixes: ee88d363d156 ("x86,static_call: Use alternative RET encoding") +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> +Signed-off-by: Borislav Petkov <bp@suse.de> +--- + arch/x86/kernel/static_call.c | 13 ++++++++----- + 1 file changed, 8 insertions(+), 5 deletions(-) + +diff --git a/arch/x86/kernel/static_call.c b/arch/x86/kernel/static_call.c +index be7038a0da4d..aaaba85d6d7f 100644 +--- a/arch/x86/kernel/static_call.c ++++ b/arch/x86/kernel/static_call.c +@@ -25,7 +25,8 @@ static const u8 xor5rax[] = { 0x2e, 0x2e, 0x2e, 0x31, 0xc0 }; + + static const u8 retinsn[] = { RET_INSN_OPCODE, 0xcc, 0xcc, 0xcc, 0xcc }; + +-static void __ref __static_call_transform(void *insn, enum insn_type type, void *func) ++static void __ref __static_call_transform(void *insn, enum insn_type type, ++ void *func, bool modinit) + { + const void *emulate = NULL; + int size = CALL_INSN_SIZE; +@@ -60,7 +61,7 @@ static void __ref __static_call_transform(void *insn, enum insn_type type, void + if (memcmp(insn, code, size) == 0) + return; + +- if (unlikely(system_state == SYSTEM_BOOTING)) ++ if (system_state == SYSTEM_BOOTING || modinit) + return text_poke_early(insn, code, size); + + text_poke_bp(insn, code, size, emulate); +@@ -114,12 +115,12 @@ void arch_static_call_transform(void *site, void *tramp, void *func, bool tail) + + if (tramp) { + __static_call_validate(tramp, true, true); +- __static_call_transform(tramp, __sc_insn(!func, true), func); ++ __static_call_transform(tramp, __sc_insn(!func, true), func, false); + } + + if (IS_ENABLED(CONFIG_HAVE_STATIC_CALL_INLINE) && site) { + __static_call_validate(site, tail, false); +- __static_call_transform(site, __sc_insn(!func, tail), func); ++ __static_call_transform(site, __sc_insn(!func, tail), func, false); + } + + mutex_unlock(&text_mutex); +@@ -145,8 +146,10 @@ bool __static_call_fixup(void *tramp, u8 op, void *dest) + return false; + } + ++ mutex_lock(&text_mutex); + if (op == RET_INSN_OPCODE || dest == &__x86_return_thunk) +- __static_call_transform(tramp, RET, NULL); ++ __static_call_transform(tramp, RET, NULL, true); ++ mutex_unlock(&text_mutex); + + return true; + } diff --git a/debian/patches/bugfix/x86/retbleed/0056-x86-asm-32-fix-annotate_unret_safe-use-on-32-bit.patch b/debian/patches/bugfix/x86/retbleed/0056-x86-asm-32-fix-annotate_unret_safe-use-on-32-bit.patch new file mode 100644 index 000000000..2ba7abb8a --- /dev/null +++ b/debian/patches/bugfix/x86/retbleed/0056-x86-asm-32-fix-annotate_unret_safe-use-on-32-bit.patch @@ -0,0 +1,36 @@ +From: Jiri Slaby <jslaby@suse.cz> +Date: Wed, 13 Jul 2022 11:50:46 +0200 +Subject: x86/asm/32: Fix ANNOTATE_UNRET_SAFE use on 32-bit +Origin: https://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git/commit?id=3131ef39fb03bbde237d0b8260445898f3dfda5b + +The build on x86_32 currently fails after commit + + 9bb2ec608a20 (objtool: Update Retpoline validation) + +with: + + arch/x86/kernel/../../x86/xen/xen-head.S:35: Error: no such instruction: `annotate_unret_safe' + +ANNOTATE_UNRET_SAFE is defined in nospec-branch.h. And head_32.S is +missing this include. Fix this. + +Fixes: 9bb2ec608a20 ("objtool: Update Retpoline validation") +Signed-off-by: Jiri Slaby <jslaby@suse.cz> +Signed-off-by: Borislav Petkov <bp@suse.de> +Link: https://lore.kernel.org/r/63e23f80-033f-f64e-7522-2816debbc367@kernel.org +--- + arch/x86/kernel/head_32.S | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S +index eb8656bac99b..9b7acc9c7874 100644 +--- a/arch/x86/kernel/head_32.S ++++ b/arch/x86/kernel/head_32.S +@@ -23,6 +23,7 @@ + #include <asm/cpufeatures.h> + #include <asm/percpu.h> + #include <asm/nops.h> ++#include <asm/nospec-branch.h> + #include <asm/bootparam.h> + #include <asm/export.h> + #include <asm/pgtable_32.h> diff --git a/debian/patches/debian/add-sysctl-to-disallow-unprivileged-CLONE_NEWUSER-by-default.patch b/debian/patches/debian/add-sysctl-to-disallow-unprivileged-CLONE_NEWUSER-by-default.patch index de0cc102b..85188c67b 100644 --- a/debian/patches/debian/add-sysctl-to-disallow-unprivileged-CLONE_NEWUSER-by-default.patch +++ b/debian/patches/debian/add-sysctl-to-disallow-unprivileged-CLONE_NEWUSER-by-default.patch @@ -16,7 +16,7 @@ Signed-off-by: Serge Hallyn <serge.hallyn@ubuntu.com> --- --- a/kernel/fork.c +++ b/kernel/fork.c -@@ -108,6 +108,11 @@ +@@ -109,6 +109,11 @@ #define CREATE_TRACE_POINTS #include <trace/events/task.h> @@ -28,7 +28,7 @@ Signed-off-by: Serge Hallyn <serge.hallyn@ubuntu.com> /* * Minimum number of threads to boot the kernel -@@ -1923,6 +1928,10 @@ static __latent_entropy struct task_stru +@@ -1996,6 +2001,10 @@ static __latent_entropy struct task_stru if ((clone_flags & (CLONE_NEWUSER|CLONE_FS)) == (CLONE_NEWUSER|CLONE_FS)) return ERR_PTR(-EINVAL); @@ -39,7 +39,7 @@ Signed-off-by: Serge Hallyn <serge.hallyn@ubuntu.com> /* * Thread groups must share signals as well, and detached threads * can only be started up within the thread group. -@@ -3027,6 +3036,12 @@ int ksys_unshare(unsigned long unshare_f +@@ -3144,6 +3153,12 @@ int ksys_unshare(unsigned long unshare_f if (unshare_flags & CLONE_NEWNS) unshare_flags |= CLONE_FS; @@ -54,7 +54,7 @@ Signed-off-by: Serge Hallyn <serge.hallyn@ubuntu.com> goto bad_unshare_out; --- a/kernel/sysctl.c +++ b/kernel/sysctl.c -@@ -146,6 +146,10 @@ int sysctl_legacy_va_layout; +@@ -134,6 +134,10 @@ int sysctl_legacy_va_layout; static const int max_extfrag_threshold = 1000; #endif @@ -64,8 +64,8 @@ Signed-off-by: Serge Hallyn <serge.hallyn@ubuntu.com> + #endif /* CONFIG_SYSCTL */ - #if defined(CONFIG_BPF_SYSCALL) && defined(CONFIG_SYSCTL) -@@ -1807,6 +1811,15 @@ static struct ctl_table kern_table[] = { + /* +@@ -1605,6 +1609,15 @@ static struct ctl_table kern_table[] = { .mode = 0644, .proc_handler = proc_dointvec, }, diff --git a/debian/patches/debian/dfsg/drivers-net-appletalk-cops.patch b/debian/patches/debian/dfsg/drivers-net-appletalk-cops.patch index 9a7884605..3bf7ca6b3 100644 --- a/debian/patches/debian/dfsg/drivers-net-appletalk-cops.patch +++ b/debian/patches/debian/dfsg/drivers-net-appletalk-cops.patch @@ -8,13 +8,11 @@ Forwarded: not-needed drivers/net/appletalk/Makefile | 1 - 2 files changed, 27 deletions(-) -Index: linux/drivers/net/appletalk/Kconfig -=================================================================== ---- linux.orig/drivers/net/appletalk/Kconfig -+++ linux/drivers/net/appletalk/Kconfig -@@ -50,35 +50,6 @@ config LTPC - This driver is experimental, which means that it may not work. - See the file <file:Documentation/networking/device_drivers/appletalk/ltpc.rst>. +--- a/drivers/net/appletalk/Kconfig ++++ b/drivers/net/appletalk/Kconfig +@@ -39,35 +39,6 @@ config DEV_APPLETALK + connect to the AppleTalk network, say Y. + -config COPS - tristate "COPS LocalTalk PC support" @@ -48,13 +46,10 @@ Index: linux/drivers/net/appletalk/Kconfig config IPDDP tristate "Appletalk-IP driver support" depends on DEV_APPLETALK && ATALK -Index: linux/drivers/net/appletalk/Makefile -=================================================================== ---- linux.orig/drivers/net/appletalk/Makefile -+++ linux/drivers/net/appletalk/Makefile -@@ -4,5 +4,4 @@ +--- a/drivers/net/appletalk/Makefile ++++ b/drivers/net/appletalk/Makefile +@@ -4,4 +4,3 @@ # obj-$(CONFIG_IPDDP) += ipddp.o -obj-$(CONFIG_COPS) += cops.o - obj-$(CONFIG_LTPC) += ltpc.o diff --git a/debian/patches/debian/export-symbols-needed-by-android-drivers.patch b/debian/patches/debian/export-symbols-needed-by-android-drivers.patch index 83069715b..dd0fc6e22 100644 --- a/debian/patches/debian/export-symbols-needed-by-android-drivers.patch +++ b/debian/patches/debian/export-symbols-needed-by-android-drivers.patch @@ -22,7 +22,7 @@ Export the currently un-exported symbols they depend on. --- a/kernel/fork.c +++ b/kernel/fork.c -@@ -1151,6 +1151,7 @@ void mmput_async(struct mm_struct *mm) +@@ -1225,6 +1225,7 @@ void mmput_async(struct mm_struct *mm) schedule_work(&mm->async_put_work); } } @@ -32,7 +32,7 @@ Export the currently un-exported symbols they depend on. /** --- a/kernel/sched/core.c +++ b/kernel/sched/core.c -@@ -6912,6 +6912,7 @@ int can_nice(const struct task_struct *p +@@ -7010,6 +7010,7 @@ int can_nice(const struct task_struct *p return (nice_rlim <= task_rlimit(p, RLIMIT_NICE) || capable(CAP_SYS_NICE)); } @@ -42,7 +42,7 @@ Export the currently un-exported symbols they depend on. --- a/kernel/sched/wait.c +++ b/kernel/sched/wait.c -@@ -244,6 +244,7 @@ void __wake_up_pollfree(struct wait_queu +@@ -243,6 +243,7 @@ void __wake_up_pollfree(struct wait_queu /* POLLFREE must have cleared the queue. */ WARN_ON_ONCE(waitqueue_active(wq_head)); } @@ -52,7 +52,7 @@ Export the currently un-exported symbols they depend on. * Note: we use "set_current_state()" _after_ the wait-queue add, --- a/kernel/task_work.c +++ b/kernel/task_work.c -@@ -60,6 +60,7 @@ int task_work_add(struct task_struct *ta +@@ -73,6 +73,7 @@ int task_work_add(struct task_struct *ta return 0; } @@ -62,7 +62,7 @@ Export the currently un-exported symbols they depend on. * task_work_cancel_match - cancel a pending work added by task_work_add() --- a/mm/memory.c +++ b/mm/memory.c -@@ -1641,6 +1641,7 @@ void zap_page_range(struct vm_area_struc +@@ -1749,6 +1749,7 @@ void zap_page_range(struct vm_area_struc mmu_notifier_invalidate_range_end(&range); tlb_finish_mmu(&tlb); } @@ -72,7 +72,7 @@ Export the currently un-exported symbols they depend on. * zap_page_range_single - remove user pages in a given range --- a/mm/shmem.c +++ b/mm/shmem.c -@@ -4141,6 +4141,7 @@ int shmem_zero_setup(struct vm_area_stru +@@ -4175,6 +4175,7 @@ int shmem_zero_setup(struct vm_area_stru return 0; } @@ -82,7 +82,7 @@ Export the currently un-exported symbols they depend on. * shmem_read_mapping_page_gfp - read into page cache, using specified page allocation flags. --- a/security/security.c +++ b/security/security.c -@@ -751,24 +751,28 @@ int security_binder_set_context_mgr(cons +@@ -752,24 +752,28 @@ int security_binder_set_context_mgr(cons { return call_int_hook(binder_set_context_mgr, 0, mgr); } @@ -113,9 +113,9 @@ Export the currently un-exported symbols they depend on. { --- a/fs/file.c +++ b/fs/file.c -@@ -804,6 +804,7 @@ int close_fd_get_file(unsigned int fd, s +@@ -813,6 +813,7 @@ struct file *close_fd_get_file(unsigned - return ret; + return file; } +EXPORT_SYMBOL_GPL(close_fd_get_file); diff --git a/debian/patches/debian/kbuild-look-for-module.lds-under-arch-directory-too.patch b/debian/patches/debian/kbuild-look-for-module.lds-under-arch-directory-too.patch index 7a87726ce..567214d4e 100644 --- a/debian/patches/debian/kbuild-look-for-module.lds-under-arch-directory-too.patch +++ b/debian/patches/debian/kbuild-look-for-module.lds-under-arch-directory-too.patch @@ -22,10 +22,8 @@ Therefore, we move module.lds under the arch build directory in rules.real and change Makefile.modfinal to look for it in both places. --- -Index: linux/scripts/Makefile.modfinal -=================================================================== ---- linux.orig/scripts/Makefile.modfinal -+++ linux/scripts/Makefile.modfinal +--- a/scripts/Makefile.modfinal ++++ b/scripts/Makefile.modfinal @@ -29,12 +29,13 @@ quiet_cmd_cc_o_c = CC [M] $@ $(call if_changed_dep,cc_o_c) @@ -42,11 +40,11 @@ Index: linux/scripts/Makefile.modfinal quiet_cmd_btf_ko = BTF [M] $@ @@ -55,7 +56,7 @@ if_changed_except = $(if $(call newer_pr - + printf '%s\n' 'cmd_$@ := $(make-cmd)' > $(dot-target).cmd, @:) # Re-generate module BTFs if either module's .ko or vmlinux changed --$(modules): %.ko: %$(mod-prelink-ext).o %.mod.o scripts/module.lds $(if $(KBUILD_BUILTIN),vmlinux) FORCE -+$(modules): %.ko: %$(mod-prelink-ext).o %.mod.o $(ARCH_MODULE_LDS) $(if $(KBUILD_BUILTIN),vmlinux) FORCE +-$(modules): %.ko: %.o %.mod.o scripts/module.lds $(if $(KBUILD_BUILTIN),vmlinux) FORCE ++$(modules): %.ko: %.o %.mod.o $(ARCH_MODULE_LDS) $(if $(KBUILD_BUILTIN),vmlinux) FORCE +$(call if_changed_except,ld_ko_o,vmlinux) ifdef CONFIG_DEBUG_INFO_BTF_MODULES +$(if $(newer-prereqs),$(call cmd,btf_ko)) diff --git a/debian/patches/debian/version.patch b/debian/patches/debian/version.patch index 8ad14de0d..3e845533f 100644 --- a/debian/patches/debian/version.patch +++ b/debian/patches/debian/version.patch @@ -15,11 +15,9 @@ are set. lib/dump_stack.c | 6 ++++-- 6 files changed, 33 insertions(+), 11 deletions(-) -Index: linux/Makefile -=================================================================== ---- linux.orig/Makefile -+++ linux/Makefile -@@ -1187,7 +1187,7 @@ PHONY += prepare archprepare +--- a/Makefile ++++ b/Makefile +@@ -1193,7 +1193,7 @@ PHONY += prepare archprepare archprepare: outputmakefile archheaders archscripts scripts include/config/kernel.release \ asm-generic $(version_h) $(autoksyms_h) include/generated/utsrelease.h \ @@ -28,7 +26,7 @@ Index: linux/Makefile prepare0: archprepare $(Q)$(MAKE) $(build)=scripts/mod -@@ -1241,6 +1241,16 @@ define filechk_version.h +@@ -1247,6 +1247,16 @@ define filechk_version.h echo \#define LINUX_VERSION_SUBLEVEL $(SUBLEVEL) endef @@ -45,7 +43,7 @@ Index: linux/Makefile $(version_h): PATCHLEVEL := $(or $(PATCHLEVEL), 0) $(version_h): SUBLEVEL := $(or $(SUBLEVEL), 0) $(version_h): FORCE -@@ -1249,6 +1259,9 @@ $(version_h): FORCE +@@ -1255,6 +1265,9 @@ $(version_h): FORCE include/generated/utsrelease.h: include/config/kernel.release FORCE $(call filechk,utsrelease.h) @@ -55,11 +53,9 @@ Index: linux/Makefile PHONY += headerdep headerdep: $(Q)find $(srctree)/include/ -name '*.h' | xargs --max-args 1 \ -Index: linux/arch/ia64/kernel/process.c -=================================================================== ---- linux.orig/arch/ia64/kernel/process.c -+++ linux/arch/ia64/kernel/process.c -@@ -34,6 +34,7 @@ +--- a/arch/ia64/kernel/process.c ++++ b/arch/ia64/kernel/process.c +@@ -35,6 +35,7 @@ #include <linux/utsname.h> #include <linux/resume_user_mode.h> #include <linux/rcupdate.h> @@ -67,7 +63,7 @@ Index: linux/arch/ia64/kernel/process.c #include <asm/cpu.h> #include <asm/delay.h> -@@ -101,9 +102,9 @@ show_regs (struct pt_regs *regs) +@@ -102,9 +103,9 @@ show_regs (struct pt_regs *regs) print_modules(); printk("\n"); show_regs_print_info(KERN_DEFAULT); @@ -79,19 +75,17 @@ Index: linux/arch/ia64/kernel/process.c printk("ip is at %pS\n", (void *)ip); printk("unat: %016lx pfs : %016lx rsc : %016lx\n", regs->ar_unat, regs->ar_pfs, regs->ar_rsc); -Index: linux/arch/powerpc/kernel/process.c -=================================================================== ---- linux.orig/arch/powerpc/kernel/process.c -+++ linux/arch/powerpc/kernel/process.c -@@ -40,6 +40,7 @@ - #include <linux/elf-randomize.h> +--- a/arch/powerpc/kernel/process.c ++++ b/arch/powerpc/kernel/process.c +@@ -38,6 +38,7 @@ + #include <linux/uaccess.h> #include <linux/pkeys.h> #include <linux/seq_buf.h> +#include <generated/package.h> #include <asm/interrupt.h> #include <asm/io.h> -@@ -1523,8 +1524,9 @@ static void __show_regs(struct pt_regs * +@@ -1520,8 +1521,9 @@ static void __show_regs(struct pt_regs * printk("NIP: "REG" LR: "REG" CTR: "REG"\n", regs->nip, regs->link, regs->ctr); @@ -103,10 +97,8 @@ Index: linux/arch/powerpc/kernel/process.c printk("MSR: "REG" ", regs->msr); print_msr_bits(regs->msr); pr_cont(" CR: %08lx XER: %08lx\n", regs->ccr, regs->xer); -Index: linux/arch/x86/um/sysrq_64.c -=================================================================== ---- linux.orig/arch/x86/um/sysrq_64.c -+++ linux/arch/x86/um/sysrq_64.c +--- a/arch/x86/um/sysrq_64.c ++++ b/arch/x86/um/sysrq_64.c @@ -9,6 +9,7 @@ #include <linux/sched.h> #include <linux/sched/debug.h> @@ -127,10 +119,8 @@ Index: linux/arch/x86/um/sysrq_64.c printk(KERN_INFO "RIP: %04lx:[<%016lx>]\n", PT_REGS_CS(regs) & 0xffff, PT_REGS_IP(regs)); printk(KERN_INFO "RSP: %016lx EFLAGS: %08lx\n", PT_REGS_SP(regs), -Index: linux/kernel/hung_task.c -=================================================================== ---- linux.orig/kernel/hung_task.c -+++ linux/kernel/hung_task.c +--- a/kernel/hung_task.c ++++ b/kernel/hung_task.c @@ -24,6 +24,7 @@ #include <linux/sched/sysctl.h> @@ -139,7 +129,7 @@ Index: linux/kernel/hung_task.c /* * The number of tasks checked: -@@ -131,10 +132,11 @@ static void check_hung_task(struct task_ +@@ -133,10 +134,11 @@ static void check_hung_task(struct task_ sysctl_hung_task_warnings--; pr_err("INFO: task %s:%d blocked for more than %ld seconds.\n", t->comm, t->pid, (jiffies - t->last_switch_time) / HZ); @@ -153,10 +143,8 @@ Index: linux/kernel/hung_task.c pr_err("\"echo 0 > /proc/sys/kernel/hung_task_timeout_secs\"" " disables this message.\n"); sched_show_task(t); -Index: linux/lib/dump_stack.c -=================================================================== ---- linux.orig/lib/dump_stack.c -+++ linux/lib/dump_stack.c +--- a/lib/dump_stack.c ++++ b/lib/dump_stack.c @@ -14,6 +14,7 @@ #include <linux/kexec.h> #include <linux/utsname.h> diff --git a/debian/patches/features/all/db-mok-keyring/0003-MODSIGN-checking-the-blacklisted-hash-before-loading-a-kernel-module.patch b/debian/patches/features/all/db-mok-keyring/0003-MODSIGN-checking-the-blacklisted-hash-before-loading-a-kernel-module.patch index a7ec5ccb5..0b4b91676 100644 --- a/debian/patches/features/all/db-mok-keyring/0003-MODSIGN-checking-the-blacklisted-hash-before-loading-a-kernel-module.patch +++ b/debian/patches/features/all/db-mok-keyring/0003-MODSIGN-checking-the-blacklisted-hash-before-loading-a-kernel-module.patch @@ -20,19 +20,28 @@ Cc: Josh Boyer <jwboyer@fedoraproject.org> Cc: James Bottomley <James.Bottomley@HansenPartnership.com> Signed-off-by: "Lee, Chun-Yi" <jlee@suse.com> [Rebased by Luca Boccassi] +[bwh: Forward-ported to 5.19: + - The type parameter to is_hash_blacklisted() is now an enumeration + rather than a string + - Adjust filename, context] --- - kernel/module_signing.c | 59 +++++++++++++++++++++++++++++++++++++++-- + kernel/module/signing.c | 59 +++++++++++++++++++++++++++++++++++++++-- 1 file changed, 57 insertions(+), 2 deletions(-) ---- a/kernel/module_signing.c -+++ b/kernel/module_signing.c -@@ -12,15 +12,62 @@ - #include <linux/string.h> +--- a/kernel/module/signing.c ++++ b/kernel/module/signing.c +@@ -13,6 +13,8 @@ #include <linux/verification.h> + #include <linux/security.h> #include <crypto/public_key.h> +#include <crypto/hash.h> +#include <keys/system_keyring.h> - #include "module-internal.h" + #include <uapi/linux/module.h> + #include "internal.h" + +@@ -37,13 +39,58 @@ void set_module_sig_enforced(void) + sig_enforce = true; + } +static int mod_is_hash_blacklisted(const void *mod, size_t verifylen) +{ @@ -66,7 +75,7 @@ Signed-off-by: "Lee, Chun-Yi" <jlee@suse.com> + + pr_debug("%ld digest: %*phN\n", verifylen, (int) digest_size, digest); + -+ ret = is_hash_blacklisted(digest, digest_size, "bin"); ++ ret = is_hash_blacklisted(digest, digest_size, BLACKLIST_HASH_BINARY); + if (ret == -EKEYREJECTED) + pr_err("Module hash %*phN is blacklisted\n", + (int) digest_size, digest); @@ -90,7 +99,7 @@ Signed-off-by: "Lee, Chun-Yi" <jlee@suse.com> int ret; pr_devel("==>%s(,%zu)\n", __func__, modlen); -@@ -28,6 +75,7 @@ int mod_verify_sig(const void *mod, stru +@@ -51,6 +98,7 @@ int mod_verify_sig(const void *mod, stru if (modlen <= sizeof(ms)) return -EBADMSG; @@ -98,7 +107,7 @@ Signed-off-by: "Lee, Chun-Yi" <jlee@suse.com> memcpy(&ms, mod + (modlen - sizeof(ms)), sizeof(ms)); ret = mod_check_sig(&ms, modlen, "module"); -@@ -38,8 +86,15 @@ int mod_verify_sig(const void *mod, stru +@@ -61,10 +109,17 @@ int mod_verify_sig(const void *mod, stru modlen -= sig_len + sizeof(ms); info->len = modlen; @@ -115,3 +124,5 @@ Signed-off-by: "Lee, Chun-Yi" <jlee@suse.com> + + return ret; } + + int module_sig_check(struct load_info *info, int flags) diff --git a/debian/patches/features/all/db-mok-keyring/KEYS-Make-use-of-platform-keyring-for-module-signature.patch b/debian/patches/features/all/db-mok-keyring/KEYS-Make-use-of-platform-keyring-for-module-signature.patch index f00bf3d24..8d6df53d2 100644 --- a/debian/patches/features/all/db-mok-keyring/KEYS-Make-use-of-platform-keyring-for-module-signature.patch +++ b/debian/patches/features/all/db-mok-keyring/KEYS-Make-use-of-platform-keyring-for-module-signature.patch @@ -15,13 +15,14 @@ were not successfully verified. Signed-off-by: Robert Holmes <robeholmes@gmail.com> Signed-off-by: Jeremy Cline <jcline@redhat.com> +[bwh: Forward-ported to 5.19: adjust filename] --- - kernel/module_signing.c | 16 ++++++++++++---- + kernel/module/signing.c | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) ---- a/kernel/module_signing.c -+++ b/kernel/module_signing.c -@@ -135,6 +135,13 @@ int mod_verify_sig(const void *mod, stru +--- a/kernel/module/signing.c ++++ b/kernel/module/signing.c +@@ -116,6 +116,13 @@ int mod_verify_sig(const void *mod, stru VERIFYING_MODULE_SIGNATURE, NULL, NULL); pr_devel("verify_pkcs7_signature() = %d\n", ret); diff --git a/debian/patches/features/all/db-mok-keyring/modsign-make-shash-allocation-failure-fatal.patch b/debian/patches/features/all/db-mok-keyring/modsign-make-shash-allocation-failure-fatal.patch index 2ae3ddde4..5485dfe65 100644 --- a/debian/patches/features/all/db-mok-keyring/modsign-make-shash-allocation-failure-fatal.patch +++ b/debian/patches/features/all/db-mok-keyring/modsign-make-shash-allocation-failure-fatal.patch @@ -8,9 +8,9 @@ so unwrap and pass up the error code. Signed-off-by: Ben Hutchings <ben@decadent.org.uk> --- ---- a/kernel/module_signing.c -+++ b/kernel/module_signing.c -@@ -51,11 +51,13 @@ static int mod_is_hash_blacklisted(const +--- a/kernel/module/signing.c ++++ b/kernel/module/signing.c +@@ -45,11 +45,13 @@ static int mod_is_hash_blacklisted(const struct shash_desc *desc; size_t digest_size, desc_size; u8 *digest; diff --git a/debian/patches/features/all/lockdown/mtd-disable-slram-and-phram-when-locked-down.patch b/debian/patches/features/all/lockdown/mtd-disable-slram-and-phram-when-locked-down.patch index 124b932c8..c718e7e2f 100644 --- a/debian/patches/features/all/lockdown/mtd-disable-slram-and-phram-when-locked-down.patch +++ b/debian/patches/features/all/lockdown/mtd-disable-slram-and-phram-when-locked-down.patch @@ -23,7 +23,7 @@ Cc: linux-mtd@lists.infradead.org --- a/drivers/mtd/devices/phram.c +++ b/drivers/mtd/devices/phram.c -@@ -317,7 +317,11 @@ +@@ -364,7 +364,11 @@ static int phram_param_call(const char * #endif } @@ -35,7 +35,7 @@ Cc: linux-mtd@lists.infradead.org + 0200, -1, KERNEL_PARAM_FL_HWPARAM | hwparam_iomem); MODULE_PARM_DESC(phram, "Memory region to map. \"phram=<name>,<start>,<length>[,<erasesize>]\""); - + #ifdef CONFIG_OF --- a/drivers/mtd/devices/slram.c +++ b/drivers/mtd/devices/slram.c @@ -43,6 +43,7 @@ @@ -46,7 +46,7 @@ Cc: linux-mtd@lists.infradead.org #include <linux/mtd/mtd.h> -@@ -65,7 +66,7 @@ +@@ -65,7 +66,7 @@ typedef struct slram_mtd_list { #ifdef MODULE static char *map[SLRAM_MAX_DEVICES_PARAMS]; @@ -55,7 +55,7 @@ Cc: linux-mtd@lists.infradead.org MODULE_PARM_DESC(map, "List of memory regions to map. \"map=<name>, <start>, <length / end>\""); #else static char *map; -@@ -281,11 +282,17 @@ +@@ -281,11 +282,17 @@ static int __init init_slram(void) #ifndef MODULE char *devstart; char *devlength; diff --git a/debian/patches/series b/debian/patches/series index 550311549..2a2f2fb61 100644 --- a/debian/patches/series +++ b/debian/patches/series @@ -99,6 +99,62 @@ features/all/db-mok-keyring/KEYS-Make-use-of-platform-keyring-for-module-signatu # Security fixes debian/i386-686-pae-pci-set-pci-nobios-by-default.patch debian/ntfs-mark-it-as-broken.patch +bugfix/x86/retbleed/0001-x86-kvm-vmx-Make-noinstr-clean.patch +bugfix/x86/retbleed/0002-x86-cpufeatures-Move-RETPOLINE-flags-to-word-11.patch +bugfix/x86/retbleed/0003-x86-retpoline-Cleanup-some-ifdefery.patch +bugfix/x86/retbleed/0004-x86-retpoline-Swizzle-retpoline-thunk.patch +bugfix/x86/retbleed/0005-x86-retpoline-Use-mfunction-return.patch +bugfix/x86/retbleed/0006-x86-Undo-return-thunk-damage.patch +bugfix/x86/retbleed/0007-x86-objtool-Create-.return_sites.patch +bugfix/x86/retbleed/0008-x86-static_call-Use-alternative-RET-encoding.patch +bugfix/x86/retbleed/0009-x86-ftrace-Use-alternative-RET-encoding.patch +bugfix/x86/retbleed/0010-x86-bpf-Use-alternative-RET-encoding.patch +bugfix/x86/retbleed/0011-x86-kvm-Fix-SETcc-emulation-for-return-thunks.patch +bugfix/x86/retbleed/0012-x86-vsyscall_emu-64-Don-t-use-RET-in-vsyscall-emulat.patch +bugfix/x86/retbleed/0013-x86-sev-Avoid-using-__x86_return_thunk.patch +bugfix/x86/retbleed/0014-x86-Use-return-thunk-in-asm-code.patch +bugfix/x86/retbleed/0015-x86-entry-Avoid-very-early-RET.patch +bugfix/x86/retbleed/0016-objtool-Treat-.text.__x86.-as-noinstr.patch +bugfix/x86/retbleed/0017-x86-Add-magic-AMD-return-thunk.patch +bugfix/x86/retbleed/0018-x86-bugs-Report-AMD-retbleed-vulnerability.patch +bugfix/x86/retbleed/0019-x86-bugs-Add-AMD-retbleed-boot-parameter.patch +bugfix/x86/retbleed/0020-x86-bugs-Enable-STIBP-for-JMP2RET.patch +bugfix/x86/retbleed/0021-x86-bugs-Keep-a-per-CPU-IA32_SPEC_CTRL-value.patch +bugfix/x86/retbleed/0022-x86-entry-Add-kernel-IBRS-implementation.patch +bugfix/x86/retbleed/0023-x86-bugs-Optimize-SPEC_CTRL-MSR-writes.patch +bugfix/x86/retbleed/0024-x86-speculation-Add-spectre_v2-ibrs-option-to-suppor.patch +bugfix/x86/retbleed/0025-x86-bugs-Split-spectre_v2_select_mitigation-and-spec.patch +bugfix/x86/retbleed/0026-x86-bugs-Report-Intel-retbleed-vulnerability.patch +bugfix/x86/retbleed/0027-intel_idle-Disable-IBRS-during-long-idle.patch +bugfix/x86/retbleed/0028-objtool-Update-Retpoline-validation.patch +bugfix/x86/retbleed/0029-x86-xen-Rename-SYS-entry-points.patch +bugfix/x86/retbleed/0030-x86-xen-Add-UNTRAIN_RET.patch +bugfix/x86/retbleed/0031-x86-bugs-Add-retbleed-ibpb.patch +bugfix/x86/retbleed/0032-x86-bugs-Do-IBPB-fallback-check-only-once.patch +bugfix/x86/retbleed/0033-objtool-Add-entry-UNRET-validation.patch +bugfix/x86/retbleed/0034-x86-cpu-amd-Add-Spectral-Chicken.patch +bugfix/x86/retbleed/0035-x86-speculation-Fix-RSB-filling-with-CONFIG_RETPOLIN.patch +bugfix/x86/retbleed/0036-x86-speculation-Fix-firmware-entry-SPEC_CTRL-handlin.patch +bugfix/x86/retbleed/0037-x86-speculation-Fix-SPEC_CTRL-write-on-SMT-state-cha.patch +bugfix/x86/retbleed/0038-x86-speculation-Use-cached-host-SPEC_CTRL-value-for-.patch +bugfix/x86/retbleed/0039-x86-speculation-Remove-x86_spec_ctrl_mask.patch +bugfix/x86/retbleed/0040-objtool-Re-add-UNWIND_HINT_-SAVE_RESTORE.patch +bugfix/x86/retbleed/0041-KVM-VMX-Flatten-__vmx_vcpu_run.patch +bugfix/x86/retbleed/0042-KVM-VMX-Convert-launched-argument-to-flags.patch +bugfix/x86/retbleed/0043-KVM-VMX-Prevent-guest-RSB-poisoning-attacks-with-eIB.patch +bugfix/x86/retbleed/0044-KVM-VMX-Fix-IBRS-handling-after-vmexit.patch +bugfix/x86/retbleed/0045-x86-speculation-Fill-RSB-on-vmexit-for-IBRS.patch +bugfix/x86/retbleed/0046-KVM-VMX-Prevent-RSB-underflow-before-vmenter.patch +bugfix/x86/retbleed/0047-x86-common-Stamp-out-the-stepping-madness.patch +bugfix/x86/retbleed/0048-x86-cpu-amd-Enumerate-BTC_NO.patch +bugfix/x86/retbleed/0049-x86-retbleed-Add-fine-grained-Kconfig-knobs.patch +bugfix/x86/retbleed/0050-x86-bugs-Add-Cannon-lake-to-RETBleed-affected-CPU-li.patch +bugfix/x86/retbleed/0051-x86-entry-Move-PUSH_AND_CLEAR_REGS-back-into-error_e.patch +bugfix/x86/retbleed/0052-x86-bugs-Do-not-enable-IBPB-on-entry-when-IBPB-is-no.patch +bugfix/x86/retbleed/0053-x86-kexec-Disable-RET-on-kexec.patch +bugfix/x86/retbleed/0054-x86-speculation-Disable-RRSBA-behavior.patch +bugfix/x86/retbleed/0055-x86-static_call-Serialize-__static_call_fixup-proper.patch +bugfix/x86/retbleed/0056-x86-asm-32-fix-annotate_unret_safe-use-on-32-bit.patch # Fix exported symbol versions bugfix/all/module-disable-matching-missing-version-crc.patch |