147 lines
4.7 KiB
Diff
147 lines
4.7 KiB
Diff
|
From b7fd8d8127fadb1d1bc6c85b2174ab93b06730d0 Mon Sep 17 00:00:00 2001
|
||
|
From: Peter Zijlstra <peterz@infradead.org>
|
||
|
Date: Wed, 22 Mar 2017 11:35:54 +0100
|
||
|
Subject: [PATCH 011/365] futex: Rework inconsistent rt_mutex/futex_q state
|
||
|
|
||
|
Upstream commit 73d786bd043ebc855f349c81ea805f6b11cbf2aa
|
||
|
|
||
|
There is a weird state in the futex_unlock_pi() path when it interleaves
|
||
|
with a concurrent futex_lock_pi() at the point where it drops hb->lock.
|
||
|
|
||
|
In this case, it can happen that the rt_mutex wait_list and the futex_q
|
||
|
disagree on pending waiters, in particular rt_mutex will find no pending
|
||
|
waiters where futex_q thinks there are. In this case the rt_mutex unlock
|
||
|
code cannot assign an owner.
|
||
|
|
||
|
The futex side fixup code has to cleanup the inconsistencies with quite a
|
||
|
bunch of interesting corner cases.
|
||
|
|
||
|
Simplify all this by changing wake_futex_pi() to return -EAGAIN when this
|
||
|
situation occurs. This then gives the futex_lock_pi() code the opportunity
|
||
|
to continue and the retried futex_unlock_pi() will now observe a coherent
|
||
|
state.
|
||
|
|
||
|
The only problem is that this breaks RT timeliness guarantees. That
|
||
|
is, consider the following scenario:
|
||
|
|
||
|
T1 and T2 are both pinned to CPU0. prio(T2) > prio(T1)
|
||
|
|
||
|
CPU0
|
||
|
|
||
|
T1
|
||
|
lock_pi()
|
||
|
queue_me() <- Waiter is visible
|
||
|
|
||
|
preemption
|
||
|
|
||
|
T2
|
||
|
unlock_pi()
|
||
|
loops with -EAGAIN forever
|
||
|
|
||
|
Which is undesirable for PI primitives. Future patches will rectify
|
||
|
this.
|
||
|
|
||
|
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
|
||
|
Cc: juri.lelli@arm.com
|
||
|
Cc: bigeasy@linutronix.de
|
||
|
Cc: xlpang@redhat.com
|
||
|
Cc: rostedt@goodmis.org
|
||
|
Cc: mathieu.desnoyers@efficios.com
|
||
|
Cc: jdesfossez@efficios.com
|
||
|
Cc: dvhart@infradead.org
|
||
|
Cc: bristot@redhat.com
|
||
|
Link: http://lkml.kernel.org/r/20170322104151.850383690@infradead.org
|
||
|
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
|
||
|
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||
|
---
|
||
|
kernel/futex.c | 50 ++++++++++++++------------------------------------
|
||
|
1 file changed, 14 insertions(+), 36 deletions(-)
|
||
|
|
||
|
diff --git a/kernel/futex.c b/kernel/futex.c
|
||
|
index 629db2cd530e..10468883d7f0 100644
|
||
|
--- a/kernel/futex.c
|
||
|
+++ b/kernel/futex.c
|
||
|
@@ -1420,12 +1420,19 @@ static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_q *top_waiter
|
||
|
new_owner = rt_mutex_next_owner(&pi_state->pi_mutex);
|
||
|
|
||
|
/*
|
||
|
- * It is possible that the next waiter (the one that brought
|
||
|
- * top_waiter owner to the kernel) timed out and is no longer
|
||
|
- * waiting on the lock.
|
||
|
+ * When we interleave with futex_lock_pi() where it does
|
||
|
+ * rt_mutex_timed_futex_lock(), we might observe @this futex_q waiter,
|
||
|
+ * but the rt_mutex's wait_list can be empty (either still, or again,
|
||
|
+ * depending on which side we land).
|
||
|
+ *
|
||
|
+ * When this happens, give up our locks and try again, giving the
|
||
|
+ * futex_lock_pi() instance time to complete, either by waiting on the
|
||
|
+ * rtmutex or removing itself from the futex queue.
|
||
|
*/
|
||
|
- if (!new_owner)
|
||
|
- new_owner = top_waiter->task;
|
||
|
+ if (!new_owner) {
|
||
|
+ raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock);
|
||
|
+ return -EAGAIN;
|
||
|
+ }
|
||
|
|
||
|
/*
|
||
|
* We pass it to the next owner. The WAITERS bit is always
|
||
|
@@ -2398,7 +2405,6 @@ static long futex_wait_restart(struct restart_block *restart);
|
||
|
*/
|
||
|
static int fixup_owner(u32 __user *uaddr, struct futex_q *q, int locked)
|
||
|
{
|
||
|
- struct task_struct *owner;
|
||
|
int ret = 0;
|
||
|
|
||
|
if (locked) {
|
||
|
@@ -2411,44 +2417,16 @@ static int fixup_owner(u32 __user *uaddr, struct futex_q *q, int locked)
|
||
|
goto out;
|
||
|
}
|
||
|
|
||
|
- /*
|
||
|
- * Catch the rare case, where the lock was released when we were on the
|
||
|
- * way back before we locked the hash bucket.
|
||
|
- */
|
||
|
- if (q->pi_state->owner == current) {
|
||
|
- /*
|
||
|
- * Try to get the rt_mutex now. This might fail as some other
|
||
|
- * task acquired the rt_mutex after we removed ourself from the
|
||
|
- * rt_mutex waiters list.
|
||
|
- */
|
||
|
- if (rt_mutex_futex_trylock(&q->pi_state->pi_mutex)) {
|
||
|
- locked = 1;
|
||
|
- goto out;
|
||
|
- }
|
||
|
-
|
||
|
- /*
|
||
|
- * pi_state is incorrect, some other task did a lock steal and
|
||
|
- * we returned due to timeout or signal without taking the
|
||
|
- * rt_mutex. Too late.
|
||
|
- */
|
||
|
- raw_spin_lock_irq(&q->pi_state->pi_mutex.wait_lock);
|
||
|
- owner = rt_mutex_owner(&q->pi_state->pi_mutex);
|
||
|
- if (!owner)
|
||
|
- owner = rt_mutex_next_owner(&q->pi_state->pi_mutex);
|
||
|
- raw_spin_unlock_irq(&q->pi_state->pi_mutex.wait_lock);
|
||
|
- ret = fixup_pi_state_owner(uaddr, q, owner);
|
||
|
- goto out;
|
||
|
- }
|
||
|
-
|
||
|
/*
|
||
|
* Paranoia check. If we did not take the lock, then we should not be
|
||
|
* the owner of the rt_mutex.
|
||
|
*/
|
||
|
- if (rt_mutex_owner(&q->pi_state->pi_mutex) == current)
|
||
|
+ if (rt_mutex_owner(&q->pi_state->pi_mutex) == current) {
|
||
|
printk(KERN_ERR "fixup_owner: ret = %d pi-mutex: %p "
|
||
|
"pi-state %p\n", ret,
|
||
|
q->pi_state->pi_mutex.owner,
|
||
|
q->pi_state->owner);
|
||
|
+ }
|
||
|
|
||
|
out:
|
||
|
return ret ? ret : locked;
|
||
|
--
|
||
|
2.28.0
|
||
|
|