154 lines
5.2 KiB
Diff
154 lines
5.2 KiB
Diff
From e35c7b4be89104580fdcc7402ac2d1bc726df177 Mon Sep 17 00:00:00 2001
|
|
From: Oleg Nesterov <oleg@redhat.com>
|
|
Date: Tue, 14 Jul 2015 14:26:34 +0200
|
|
Subject: [PATCH 089/365] signal/x86: Delay calling signals in atomic
|
|
|
|
On x86_64 we must disable preemption before we enable interrupts
|
|
for stack faults, int3 and debugging, because the current task is using
|
|
a per CPU debug stack defined by the IST. If we schedule out, another task
|
|
can come in and use the same stack and cause the stack to be corrupted
|
|
and crash the kernel on return.
|
|
|
|
When CONFIG_PREEMPT_RT_FULL is enabled, spin_locks become mutexes, and
|
|
one of these is the spin lock used in signal handling.
|
|
|
|
Some of the debug code (int3) causes do_trap() to send a signal.
|
|
This function calls a spin lock that has been converted to a mutex
|
|
and has the possibility to sleep. If this happens, the above issues with
|
|
the corrupted stack is possible.
|
|
|
|
Instead of calling the signal right away, for PREEMPT_RT and x86_64,
|
|
the signal information is stored on the stacks task_struct and
|
|
TIF_NOTIFY_RESUME is set. Then on exit of the trap, the signal resume
|
|
code will send the signal when preemption is enabled.
|
|
|
|
[ rostedt: Switched from #ifdef CONFIG_PREEMPT_RT_FULL to
|
|
ARCH_RT_DELAYS_SIGNAL_SEND and added comments to the code. ]
|
|
|
|
|
|
Signed-off-by: Oleg Nesterov <oleg@redhat.com>
|
|
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
|
|
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
|
|
---
|
|
arch/x86/entry/common.c | 7 +++++++
|
|
arch/x86/include/asm/signal.h | 13 ++++++++++++
|
|
include/linux/sched.h | 4 ++++
|
|
kernel/signal.c | 37 +++++++++++++++++++++++++++++++++--
|
|
4 files changed, 59 insertions(+), 2 deletions(-)
|
|
|
|
diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c
|
|
index 8841d016b4a4..0b8da1cd26ca 100644
|
|
--- a/arch/x86/entry/common.c
|
|
+++ b/arch/x86/entry/common.c
|
|
@@ -150,6 +150,13 @@ static void exit_to_usermode_loop(struct pt_regs *regs, u32 cached_flags)
|
|
if (cached_flags & _TIF_NEED_RESCHED)
|
|
schedule();
|
|
|
|
+#ifdef ARCH_RT_DELAYS_SIGNAL_SEND
|
|
+ if (unlikely(current->forced_info.si_signo)) {
|
|
+ struct task_struct *t = current;
|
|
+ force_sig_info(t->forced_info.si_signo, &t->forced_info, t);
|
|
+ t->forced_info.si_signo = 0;
|
|
+ }
|
|
+#endif
|
|
if (cached_flags & _TIF_UPROBE)
|
|
uprobe_notify_resume(regs);
|
|
|
|
diff --git a/arch/x86/include/asm/signal.h b/arch/x86/include/asm/signal.h
|
|
index 8af22be0fe61..c370fba8c38b 100644
|
|
--- a/arch/x86/include/asm/signal.h
|
|
+++ b/arch/x86/include/asm/signal.h
|
|
@@ -27,6 +27,19 @@ typedef struct {
|
|
#define SA_IA32_ABI 0x02000000u
|
|
#define SA_X32_ABI 0x01000000u
|
|
|
|
+/*
|
|
+ * Because some traps use the IST stack, we must keep preemption
|
|
+ * disabled while calling do_trap(), but do_trap() may call
|
|
+ * force_sig_info() which will grab the signal spin_locks for the
|
|
+ * task, which in PREEMPT_RT_FULL are mutexes. By defining
|
|
+ * ARCH_RT_DELAYS_SIGNAL_SEND the force_sig_info() will set
|
|
+ * TIF_NOTIFY_RESUME and set up the signal to be sent on exit of the
|
|
+ * trap.
|
|
+ */
|
|
+#if defined(CONFIG_PREEMPT_RT_FULL) && defined(CONFIG_X86_64)
|
|
+#define ARCH_RT_DELAYS_SIGNAL_SEND
|
|
+#endif
|
|
+
|
|
#ifndef CONFIG_COMPAT
|
|
typedef sigset_t compat_sigset_t;
|
|
#endif
|
|
diff --git a/include/linux/sched.h b/include/linux/sched.h
|
|
index a6597145763e..a4ba30296e3f 100644
|
|
--- a/include/linux/sched.h
|
|
+++ b/include/linux/sched.h
|
|
@@ -1875,6 +1875,10 @@ struct task_struct {
|
|
sigset_t blocked, real_blocked;
|
|
sigset_t saved_sigmask; /* restored if set_restore_sigmask() was used */
|
|
struct sigpending pending;
|
|
+#ifdef CONFIG_PREEMPT_RT_FULL
|
|
+ /* TODO: move me into ->restart_block ? */
|
|
+ struct siginfo forced_info;
|
|
+#endif
|
|
|
|
unsigned long sas_ss_sp;
|
|
size_t sas_ss_size;
|
|
diff --git a/kernel/signal.c b/kernel/signal.c
|
|
index 77354d135ea9..aca4b4cd11a2 100644
|
|
--- a/kernel/signal.c
|
|
+++ b/kernel/signal.c
|
|
@@ -1284,8 +1284,8 @@ int do_send_sig_info(int sig, struct siginfo *info, struct task_struct *p,
|
|
* We don't want to have recursive SIGSEGV's etc, for example,
|
|
* that is why we also clear SIGNAL_UNKILLABLE.
|
|
*/
|
|
-int
|
|
-force_sig_info(int sig, struct siginfo *info, struct task_struct *t)
|
|
+static int
|
|
+do_force_sig_info(int sig, struct siginfo *info, struct task_struct *t)
|
|
{
|
|
unsigned long int flags;
|
|
int ret, blocked, ignored;
|
|
@@ -1310,6 +1310,39 @@ force_sig_info(int sig, struct siginfo *info, struct task_struct *t)
|
|
return ret;
|
|
}
|
|
|
|
+int force_sig_info(int sig, struct siginfo *info, struct task_struct *t)
|
|
+{
|
|
+/*
|
|
+ * On some archs, PREEMPT_RT has to delay sending a signal from a trap
|
|
+ * since it can not enable preemption, and the signal code's spin_locks
|
|
+ * turn into mutexes. Instead, it must set TIF_NOTIFY_RESUME which will
|
|
+ * send the signal on exit of the trap.
|
|
+ */
|
|
+#ifdef ARCH_RT_DELAYS_SIGNAL_SEND
|
|
+ if (in_atomic()) {
|
|
+ if (WARN_ON_ONCE(t != current))
|
|
+ return 0;
|
|
+ if (WARN_ON_ONCE(t->forced_info.si_signo))
|
|
+ return 0;
|
|
+
|
|
+ if (is_si_special(info)) {
|
|
+ WARN_ON_ONCE(info != SEND_SIG_PRIV);
|
|
+ t->forced_info.si_signo = sig;
|
|
+ t->forced_info.si_errno = 0;
|
|
+ t->forced_info.si_code = SI_KERNEL;
|
|
+ t->forced_info.si_pid = 0;
|
|
+ t->forced_info.si_uid = 0;
|
|
+ } else {
|
|
+ t->forced_info = *info;
|
|
+ }
|
|
+
|
|
+ set_tsk_thread_flag(t, TIF_NOTIFY_RESUME);
|
|
+ return 0;
|
|
+ }
|
|
+#endif
|
|
+ return do_force_sig_info(sig, info, t);
|
|
+}
|
|
+
|
|
/*
|
|
* Nuke all other threads in the group.
|
|
*/
|
|
--
|
|
2.28.0
|
|
|