tegrakernel/kernel/kernel-4.9/rt-patches/0011-futex-Rework-inconsist...

From b7fd8d8127fadb1d1bc6c85b2174ab93b06730d0 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Wed, 22 Mar 2017 11:35:54 +0100
Subject: [PATCH 011/365] futex: Rework inconsistent rt_mutex/futex_q state

Upstream commit 73d786bd043ebc855f349c81ea805f6b11cbf2aa

There is a weird state in the futex_unlock_pi() path when it interleaves
with a concurrent futex_lock_pi() at the point where it drops hb->lock.

In this case, it can happen that the rt_mutex wait_list and the futex_q
disagree on pending waiters, in particular rt_mutex will find no pending
waiters where futex_q thinks there are. In this case the rt_mutex unlock
code cannot assign an owner.

The futex side fixup code has to cleanup the inconsistencies with quite a
bunch of interesting corner cases.

Simplify all this by changing wake_futex_pi() to return -EAGAIN when this
situation occurs. This then gives the futex_lock_pi() code the opportunity
to continue and the retried futex_unlock_pi() will now observe a coherent
state.

The only problem is that this breaks RT timeliness guarantees. That
is, consider the following scenario:

  T1 and T2 are both pinned to CPU0. prio(T2) > prio(T1)

    CPU0

    T1
      lock_pi()
      queue_me()  <- Waiter is visible

    preemption

    T2
      unlock_pi()
	loops with -EAGAIN forever

Which is undesirable for PI primitives. Future patches will rectify
this.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: juri.lelli@arm.com
Cc: bigeasy@linutronix.de
Cc: xlpang@redhat.com
Cc: rostedt@goodmis.org
Cc: mathieu.desnoyers@efficios.com
Cc: jdesfossez@efficios.com
Cc: dvhart@infradead.org
Cc: bristot@redhat.com
Link: http://lkml.kernel.org/r/20170322104151.850383690@infradead.org
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
 kernel/futex.c | 50 ++++++++++++++------------------------------------
 1 file changed, 14 insertions(+), 36 deletions(-)

diff --git a/kernel/futex.c b/kernel/futex.c
index 629db2cd530e..10468883d7f0 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -1420,12 +1420,19 @@ static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_q *top_waiter
 	new_owner = rt_mutex_next_owner(&pi_state->pi_mutex);
 
 	/*
-	 * It is possible that the next waiter (the one that brought
-	 * top_waiter owner to the kernel) timed out and is no longer
-	 * waiting on the lock.
+	 * When we interleave with futex_lock_pi() where it does
+	 * rt_mutex_timed_futex_lock(), we might observe @this futex_q waiter,
+	 * but the rt_mutex's wait_list can be empty (either still, or again,
+	 * depending on which side we land).
+	 *
+	 * When this happens, give up our locks and try again, giving the
+	 * futex_lock_pi() instance time to complete, either by waiting on the
+	 * rtmutex or removing itself from the futex queue.
 	 */
-	if (!new_owner)
-		new_owner = top_waiter->task;
+	if (!new_owner) {
+		raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock);
+		return -EAGAIN;
+	}
 
 	/*
 	 * We pass it to the next owner. The WAITERS bit is always
@@ -2398,7 +2405,6 @@ static long futex_wait_restart(struct restart_block *restart);
  */
 static int fixup_owner(u32 __user *uaddr, struct futex_q *q, int locked)
 {
-	struct task_struct *owner;
 	int ret = 0;
 
 	if (locked) {
@@ -2411,44 +2417,16 @@ static int fixup_owner(u32 __user *uaddr, struct futex_q *q, int locked)
 		goto out;
 	}
 
-	/*
-	 * Catch the rare case, where the lock was released when we were on the
-	 * way back before we locked the hash bucket.
-	 */
-	if (q->pi_state->owner == current) {
-		/*
-		 * Try to get the rt_mutex now. This might fail as some other
-		 * task acquired the rt_mutex after we removed ourself from the
-		 * rt_mutex waiters list.
-		 */
-		if (rt_mutex_futex_trylock(&q->pi_state->pi_mutex)) {
-			locked = 1;
-			goto out;
-		}
-
-		/*
-		 * pi_state is incorrect, some other task did a lock steal and
-		 * we returned due to timeout or signal without taking the
-		 * rt_mutex. Too late.
-		 */
-		raw_spin_lock_irq(&q->pi_state->pi_mutex.wait_lock);
-		owner = rt_mutex_owner(&q->pi_state->pi_mutex);
-		if (!owner)
-			owner = rt_mutex_next_owner(&q->pi_state->pi_mutex);
-		raw_spin_unlock_irq(&q->pi_state->pi_mutex.wait_lock);
-		ret = fixup_pi_state_owner(uaddr, q, owner);
-		goto out;
-	}
-
 	/*
 	 * Paranoia check. If we did not take the lock, then we should not be
 	 * the owner of the rt_mutex.
 	 */
-	if (rt_mutex_owner(&q->pi_state->pi_mutex) == current)
+	if (rt_mutex_owner(&q->pi_state->pi_mutex) == current) {
 		printk(KERN_ERR "fixup_owner: ret = %d pi-mutex: %p "
 				"pi-state %p\n", ret,
 				q->pi_state->pi_mutex.owner,
 				q->pi_state->owner);
+	}
 
 out:
 	return ret ? ret : locked;
-- 
2.28.0
initial commit tegra kernel 32.6.1 2022-02-16 09:13:02 -06:00			`From b7fd8d8127fadb1d1bc6c85b2174ab93b06730d0 Mon Sep 17 00:00:00 2001`
			`From: Peter Zijlstra <peterz@infradead.org>`
			`Date: Wed, 22 Mar 2017 11:35:54 +0100`
			`Subject: [PATCH 011/365] futex: Rework inconsistent rt_mutex/futex_q state`

			`Upstream commit 73d786bd043ebc855f349c81ea805f6b11cbf2aa`

			`There is a weird state in the futex_unlock_pi() path when it interleaves`
			`with a concurrent futex_lock_pi() at the point where it drops hb->lock.`

			`In this case, it can happen that the rt_mutex wait_list and the futex_q`
			`disagree on pending waiters, in particular rt_mutex will find no pending`
			`waiters where futex_q thinks there are. In this case the rt_mutex unlock`
			`code cannot assign an owner.`

			`The futex side fixup code has to cleanup the inconsistencies with quite a`
			`bunch of interesting corner cases.`

			`Simplify all this by changing wake_futex_pi() to return -EAGAIN when this`
			`situation occurs. This then gives the futex_lock_pi() code the opportunity`
			`to continue and the retried futex_unlock_pi() will now observe a coherent`
			`state.`

			`The only problem is that this breaks RT timeliness guarantees. That`
			`is, consider the following scenario:`

			`T1 and T2 are both pinned to CPU0. prio(T2) > prio(T1)`

			`CPU0`

			`T1`
			`lock_pi()`
			`queue_me() <- Waiter is visible`

			`preemption`

			`T2`
			`unlock_pi()`
			`loops with -EAGAIN forever`

			`Which is undesirable for PI primitives. Future patches will rectify`
			`this.`

			`Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>`
			`Cc: juri.lelli@arm.com`
			`Cc: bigeasy@linutronix.de`
			`Cc: xlpang@redhat.com`
			`Cc: rostedt@goodmis.org`
			`Cc: mathieu.desnoyers@efficios.com`
			`Cc: jdesfossez@efficios.com`
			`Cc: dvhart@infradead.org`
			`Cc: bristot@redhat.com`
			`Link: http://lkml.kernel.org/r/20170322104151.850383690@infradead.org`
			`Signed-off-by: Thomas Gleixner <tglx@linutronix.de>`
			`Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>`
			`---`
			`kernel/futex.c \| 50 ++++++++++++++------------------------------------`
			`1 file changed, 14 insertions(+), 36 deletions(-)`

			`diff --git a/kernel/futex.c b/kernel/futex.c`
			`index 629db2cd530e..10468883d7f0 100644`
			`--- a/kernel/futex.c`
			`+++ b/kernel/futex.c`
			`@@ -1420,12 +1420,19 @@ static int wake_futex_pi(u32 __user uaddr, u32 uval, struct futex_q top_waiter`
			`new_owner = rt_mutex_next_owner(&pi_state->pi_mutex);`

			`/*`
			`- * It is possible that the next waiter (the one that brought`
			`- * top_waiter owner to the kernel) timed out and is no longer`
			`- * waiting on the lock.`
			`+ * When we interleave with futex_lock_pi() where it does`
			`+ * rt_mutex_timed_futex_lock(), we might observe @this futex_q waiter,`
			`+ * but the rt_mutex's wait_list can be empty (either still, or again,`
			`+ * depending on which side we land).`
			`+ *`
			`+ * When this happens, give up our locks and try again, giving the`
			`+ * futex_lock_pi() instance time to complete, either by waiting on the`
			`+ * rtmutex or removing itself from the futex queue.`
			`*/`
			`- if (!new_owner)`
			`- new_owner = top_waiter->task;`
			`+ if (!new_owner) {`
			`+ raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock);`
			`+ return -EAGAIN;`
			`+ }`

			`/*`
			`* We pass it to the next owner. The WAITERS bit is always`
			`@@ -2398,7 +2405,6 @@ static long futex_wait_restart(struct restart_block *restart);`
			`*/`
			`static int fixup_owner(u32 __user uaddr, struct futex_q q, int locked)`
			`{`
			`- struct task_struct *owner;`
			`int ret = 0;`

			`if (locked) {`
			`@@ -2411,44 +2417,16 @@ static int fixup_owner(u32 __user uaddr, struct futex_q q, int locked)`
			`goto out;`
			`}`

			`- /*`
			`- * Catch the rare case, where the lock was released when we were on the`
			`- * way back before we locked the hash bucket.`
			`- */`
			`- if (q->pi_state->owner == current) {`
			`- /*`
			`- * Try to get the rt_mutex now. This might fail as some other`
			`- * task acquired the rt_mutex after we removed ourself from the`
			`- * rt_mutex waiters list.`
			`- */`
			`- if (rt_mutex_futex_trylock(&q->pi_state->pi_mutex)) {`
			`- locked = 1;`
			`- goto out;`
			`- }`
			`-`
			`- /*`
			`- * pi_state is incorrect, some other task did a lock steal and`
			`- * we returned due to timeout or signal without taking the`
			`- * rt_mutex. Too late.`
			`- */`
			`- raw_spin_lock_irq(&q->pi_state->pi_mutex.wait_lock);`
			`- owner = rt_mutex_owner(&q->pi_state->pi_mutex);`
			`- if (!owner)`
			`- owner = rt_mutex_next_owner(&q->pi_state->pi_mutex);`
			`- raw_spin_unlock_irq(&q->pi_state->pi_mutex.wait_lock);`
			`- ret = fixup_pi_state_owner(uaddr, q, owner);`
			`- goto out;`
			`- }`
			`-`
			`/*`
			`* Paranoia check. If we did not take the lock, then we should not be`
			`* the owner of the rt_mutex.`
			`*/`
			`- if (rt_mutex_owner(&q->pi_state->pi_mutex) == current)`
			`+ if (rt_mutex_owner(&q->pi_state->pi_mutex) == current) {`
			`printk(KERN_ERR "fixup_owner: ret = %d pi-mutex: %p "`
			`"pi-state %p\n", ret,`
			`q->pi_state->pi_mutex.owner,`
			`q->pi_state->owner);`
			`+ }`

			`out:`
			`return ret ? ret : locked;`
			`--`
			`2.28.0`