503 lines
15 KiB
Diff
503 lines
15 KiB
Diff
|
From 8c3550aa65e8cbd0873975a4ec542361b940fb94 Mon Sep 17 00:00:00 2001
|
||
|
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||
|
Date: Thu, 30 Nov 2017 13:40:10 +0100
|
||
|
Subject: [PATCH 351/365] crypto: limit more FPU-enabled sections
|
||
|
MIME-Version: 1.0
|
||
|
Content-Type: text/plain; charset=UTF-8
|
||
|
Content-Transfer-Encoding: 8bit
|
||
|
|
||
|
Those crypto drivers use SSE/AVX/… for their crypto work and in order to
|
||
|
do so in kernel they need to enable the "FPU" in kernel mode which
|
||
|
disables preemption.
|
||
|
There are two problems with the way they are used:
|
||
|
- the while loop which processes X bytes may create latency spikes and
|
||
|
should be avoided or limited.
|
||
|
- the cipher-walk-next part may allocate/free memory and may use
|
||
|
kmap_atomic().
|
||
|
|
||
|
The whole kernel_fpu_begin()/end() processing isn't probably that cheap.
|
||
|
It most likely makes sense to process as much of those as possible in one
|
||
|
go. The new *_fpu_sched_rt() schedules only if a RT task is pending.
|
||
|
|
||
|
Probably we should measure the performance those ciphers in pure SW
|
||
|
mode and with this optimisations to see if it makes sense to keep them
|
||
|
for RT.
|
||
|
|
||
|
This kernel_fpu_resched() makes the code more preemptible which might hurt
|
||
|
performance.
|
||
|
|
||
|
Cc: stable-rt@vger.kernel.org
|
||
|
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
|
||
|
(cherry picked from commit 0dcc4c1693ef37e166da420ef7c68c7047c996f1)
|
||
|
Signed-off-by: Julia Cartwright <julia@ni.com>
|
||
|
---
|
||
|
arch/x86/crypto/camellia_aesni_avx2_glue.c | 20 ++++++++++++++++
|
||
|
arch/x86/crypto/camellia_aesni_avx_glue.c | 19 +++++++++++++++
|
||
|
arch/x86/crypto/cast6_avx_glue.c | 24 +++++++++++++++----
|
||
|
arch/x86/crypto/chacha20_glue.c | 9 ++++----
|
||
|
arch/x86/crypto/serpent_avx2_glue.c | 19 +++++++++++++++
|
||
|
arch/x86/crypto/serpent_avx_glue.c | 23 ++++++++++++++----
|
||
|
arch/x86/crypto/serpent_sse2_glue.c | 23 ++++++++++++++----
|
||
|
arch/x86/crypto/twofish_avx_glue.c | 27 ++++++++++++++++++++--
|
||
|
arch/x86/include/asm/fpu/api.h | 1 +
|
||
|
arch/x86/kernel/fpu/core.c | 12 ++++++++++
|
||
|
10 files changed, 158 insertions(+), 19 deletions(-)
|
||
|
|
||
|
diff --git a/arch/x86/crypto/camellia_aesni_avx2_glue.c b/arch/x86/crypto/camellia_aesni_avx2_glue.c
|
||
|
index 60907c139c4e..0902db7d326a 100644
|
||
|
--- a/arch/x86/crypto/camellia_aesni_avx2_glue.c
|
||
|
+++ b/arch/x86/crypto/camellia_aesni_avx2_glue.c
|
||
|
@@ -206,6 +206,20 @@ struct crypt_priv {
|
||
|
bool fpu_enabled;
|
||
|
};
|
||
|
|
||
|
+#ifdef CONFIG_PREEMPT_RT_FULL
|
||
|
+static void camellia_fpu_end_rt(struct crypt_priv *ctx)
|
||
|
+{
|
||
|
+ bool fpu_enabled = ctx->fpu_enabled;
|
||
|
+
|
||
|
+ if (!fpu_enabled)
|
||
|
+ return;
|
||
|
+ camellia_fpu_end(fpu_enabled);
|
||
|
+ ctx->fpu_enabled = false;
|
||
|
+}
|
||
|
+#else
|
||
|
+static void camellia_fpu_end_rt(struct crypt_priv *ctx) { }
|
||
|
+#endif
|
||
|
+
|
||
|
static void encrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes)
|
||
|
{
|
||
|
const unsigned int bsize = CAMELLIA_BLOCK_SIZE;
|
||
|
@@ -221,16 +235,19 @@ static void encrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes)
|
||
|
}
|
||
|
|
||
|
if (nbytes >= CAMELLIA_AESNI_PARALLEL_BLOCKS * bsize) {
|
||
|
+ kernel_fpu_resched();
|
||
|
camellia_ecb_enc_16way(ctx->ctx, srcdst, srcdst);
|
||
|
srcdst += bsize * CAMELLIA_AESNI_PARALLEL_BLOCKS;
|
||
|
nbytes -= bsize * CAMELLIA_AESNI_PARALLEL_BLOCKS;
|
||
|
}
|
||
|
|
||
|
while (nbytes >= CAMELLIA_PARALLEL_BLOCKS * bsize) {
|
||
|
+ kernel_fpu_resched();
|
||
|
camellia_enc_blk_2way(ctx->ctx, srcdst, srcdst);
|
||
|
srcdst += bsize * CAMELLIA_PARALLEL_BLOCKS;
|
||
|
nbytes -= bsize * CAMELLIA_PARALLEL_BLOCKS;
|
||
|
}
|
||
|
+ camellia_fpu_end_rt(ctx);
|
||
|
|
||
|
for (i = 0; i < nbytes / bsize; i++, srcdst += bsize)
|
||
|
camellia_enc_blk(ctx->ctx, srcdst, srcdst);
|
||
|
@@ -251,16 +268,19 @@ static void decrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes)
|
||
|
}
|
||
|
|
||
|
if (nbytes >= CAMELLIA_AESNI_PARALLEL_BLOCKS * bsize) {
|
||
|
+ kernel_fpu_resched();
|
||
|
camellia_ecb_dec_16way(ctx->ctx, srcdst, srcdst);
|
||
|
srcdst += bsize * CAMELLIA_AESNI_PARALLEL_BLOCKS;
|
||
|
nbytes -= bsize * CAMELLIA_AESNI_PARALLEL_BLOCKS;
|
||
|
}
|
||
|
|
||
|
while (nbytes >= CAMELLIA_PARALLEL_BLOCKS * bsize) {
|
||
|
+ kernel_fpu_resched();
|
||
|
camellia_dec_blk_2way(ctx->ctx, srcdst, srcdst);
|
||
|
srcdst += bsize * CAMELLIA_PARALLEL_BLOCKS;
|
||
|
nbytes -= bsize * CAMELLIA_PARALLEL_BLOCKS;
|
||
|
}
|
||
|
+ camellia_fpu_end_rt(ctx);
|
||
|
|
||
|
for (i = 0; i < nbytes / bsize; i++, srcdst += bsize)
|
||
|
camellia_dec_blk(ctx->ctx, srcdst, srcdst);
|
||
|
diff --git a/arch/x86/crypto/camellia_aesni_avx_glue.c b/arch/x86/crypto/camellia_aesni_avx_glue.c
|
||
|
index d96429da88eb..3b8e91841039 100644
|
||
|
--- a/arch/x86/crypto/camellia_aesni_avx_glue.c
|
||
|
+++ b/arch/x86/crypto/camellia_aesni_avx_glue.c
|
||
|
@@ -210,6 +210,21 @@ struct crypt_priv {
|
||
|
bool fpu_enabled;
|
||
|
};
|
||
|
|
||
|
+#ifdef CONFIG_PREEMPT_RT_FULL
|
||
|
+static void camellia_fpu_end_rt(struct crypt_priv *ctx)
|
||
|
+{
|
||
|
+ bool fpu_enabled = ctx->fpu_enabled;
|
||
|
+
|
||
|
+ if (!fpu_enabled)
|
||
|
+ return;
|
||
|
+ camellia_fpu_end(fpu_enabled);
|
||
|
+ ctx->fpu_enabled = false;
|
||
|
+}
|
||
|
+
|
||
|
+#else
|
||
|
+static void camellia_fpu_end_rt(struct crypt_priv *ctx) { }
|
||
|
+#endif
|
||
|
+
|
||
|
static void encrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes)
|
||
|
{
|
||
|
const unsigned int bsize = CAMELLIA_BLOCK_SIZE;
|
||
|
@@ -225,10 +240,12 @@ static void encrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes)
|
||
|
}
|
||
|
|
||
|
while (nbytes >= CAMELLIA_PARALLEL_BLOCKS * bsize) {
|
||
|
+ kernel_fpu_resched();
|
||
|
camellia_enc_blk_2way(ctx->ctx, srcdst, srcdst);
|
||
|
srcdst += bsize * CAMELLIA_PARALLEL_BLOCKS;
|
||
|
nbytes -= bsize * CAMELLIA_PARALLEL_BLOCKS;
|
||
|
}
|
||
|
+ camellia_fpu_end_rt(ctx);
|
||
|
|
||
|
for (i = 0; i < nbytes / bsize; i++, srcdst += bsize)
|
||
|
camellia_enc_blk(ctx->ctx, srcdst, srcdst);
|
||
|
@@ -249,10 +266,12 @@ static void decrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes)
|
||
|
}
|
||
|
|
||
|
while (nbytes >= CAMELLIA_PARALLEL_BLOCKS * bsize) {
|
||
|
+ kernel_fpu_resched();
|
||
|
camellia_dec_blk_2way(ctx->ctx, srcdst, srcdst);
|
||
|
srcdst += bsize * CAMELLIA_PARALLEL_BLOCKS;
|
||
|
nbytes -= bsize * CAMELLIA_PARALLEL_BLOCKS;
|
||
|
}
|
||
|
+ camellia_fpu_end_rt(ctx);
|
||
|
|
||
|
for (i = 0; i < nbytes / bsize; i++, srcdst += bsize)
|
||
|
camellia_dec_blk(ctx->ctx, srcdst, srcdst);
|
||
|
diff --git a/arch/x86/crypto/cast6_avx_glue.c b/arch/x86/crypto/cast6_avx_glue.c
|
||
|
index 50e684768c55..8caf9ba8c1da 100644
|
||
|
--- a/arch/x86/crypto/cast6_avx_glue.c
|
||
|
+++ b/arch/x86/crypto/cast6_avx_glue.c
|
||
|
@@ -205,19 +205,33 @@ struct crypt_priv {
|
||
|
bool fpu_enabled;
|
||
|
};
|
||
|
|
||
|
+#ifdef CONFIG_PREEMPT_RT_FULL
|
||
|
+static void cast6_fpu_end_rt(struct crypt_priv *ctx)
|
||
|
+{
|
||
|
+ bool fpu_enabled = ctx->fpu_enabled;
|
||
|
+
|
||
|
+ if (!fpu_enabled)
|
||
|
+ return;
|
||
|
+ cast6_fpu_end(fpu_enabled);
|
||
|
+ ctx->fpu_enabled = false;
|
||
|
+}
|
||
|
+
|
||
|
+#else
|
||
|
+static void cast6_fpu_end_rt(struct crypt_priv *ctx) { }
|
||
|
+#endif
|
||
|
+
|
||
|
static void encrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes)
|
||
|
{
|
||
|
const unsigned int bsize = CAST6_BLOCK_SIZE;
|
||
|
struct crypt_priv *ctx = priv;
|
||
|
int i;
|
||
|
|
||
|
- ctx->fpu_enabled = cast6_fpu_begin(ctx->fpu_enabled, nbytes);
|
||
|
-
|
||
|
if (nbytes == bsize * CAST6_PARALLEL_BLOCKS) {
|
||
|
+ ctx->fpu_enabled = cast6_fpu_begin(ctx->fpu_enabled, nbytes);
|
||
|
cast6_ecb_enc_8way(ctx->ctx, srcdst, srcdst);
|
||
|
+ cast6_fpu_end_rt(ctx);
|
||
|
return;
|
||
|
}
|
||
|
-
|
||
|
for (i = 0; i < nbytes / bsize; i++, srcdst += bsize)
|
||
|
__cast6_encrypt(ctx->ctx, srcdst, srcdst);
|
||
|
}
|
||
|
@@ -228,10 +242,10 @@ static void decrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes)
|
||
|
struct crypt_priv *ctx = priv;
|
||
|
int i;
|
||
|
|
||
|
- ctx->fpu_enabled = cast6_fpu_begin(ctx->fpu_enabled, nbytes);
|
||
|
-
|
||
|
if (nbytes == bsize * CAST6_PARALLEL_BLOCKS) {
|
||
|
+ ctx->fpu_enabled = cast6_fpu_begin(ctx->fpu_enabled, nbytes);
|
||
|
cast6_ecb_dec_8way(ctx->ctx, srcdst, srcdst);
|
||
|
+ cast6_fpu_end_rt(ctx);
|
||
|
return;
|
||
|
}
|
||
|
|
||
|
diff --git a/arch/x86/crypto/chacha20_glue.c b/arch/x86/crypto/chacha20_glue.c
|
||
|
index 0a5fedf43bdc..15dee153710b 100644
|
||
|
--- a/arch/x86/crypto/chacha20_glue.c
|
||
|
+++ b/arch/x86/crypto/chacha20_glue.c
|
||
|
@@ -81,23 +81,24 @@ static int chacha20_simd(struct blkcipher_desc *desc, struct scatterlist *dst,
|
||
|
|
||
|
crypto_chacha20_init(state, crypto_blkcipher_ctx(desc->tfm), walk.iv);
|
||
|
|
||
|
- kernel_fpu_begin();
|
||
|
-
|
||
|
while (walk.nbytes >= CHACHA20_BLOCK_SIZE) {
|
||
|
+ kernel_fpu_begin();
|
||
|
+
|
||
|
chacha20_dosimd(state, walk.dst.virt.addr, walk.src.virt.addr,
|
||
|
rounddown(walk.nbytes, CHACHA20_BLOCK_SIZE));
|
||
|
+ kernel_fpu_end();
|
||
|
err = blkcipher_walk_done(desc, &walk,
|
||
|
walk.nbytes % CHACHA20_BLOCK_SIZE);
|
||
|
}
|
||
|
|
||
|
if (walk.nbytes) {
|
||
|
+ kernel_fpu_begin();
|
||
|
chacha20_dosimd(state, walk.dst.virt.addr, walk.src.virt.addr,
|
||
|
walk.nbytes);
|
||
|
+ kernel_fpu_end();
|
||
|
err = blkcipher_walk_done(desc, &walk, 0);
|
||
|
}
|
||
|
|
||
|
- kernel_fpu_end();
|
||
|
-
|
||
|
return err;
|
||
|
}
|
||
|
|
||
|
diff --git a/arch/x86/crypto/serpent_avx2_glue.c b/arch/x86/crypto/serpent_avx2_glue.c
|
||
|
index 870f6d812a2d..5c806bf39f1d 100644
|
||
|
--- a/arch/x86/crypto/serpent_avx2_glue.c
|
||
|
+++ b/arch/x86/crypto/serpent_avx2_glue.c
|
||
|
@@ -184,6 +184,21 @@ struct crypt_priv {
|
||
|
bool fpu_enabled;
|
||
|
};
|
||
|
|
||
|
+#ifdef CONFIG_PREEMPT_RT_FULL
|
||
|
+static void serpent_fpu_end_rt(struct crypt_priv *ctx)
|
||
|
+{
|
||
|
+ bool fpu_enabled = ctx->fpu_enabled;
|
||
|
+
|
||
|
+ if (!fpu_enabled)
|
||
|
+ return;
|
||
|
+ serpent_fpu_end(fpu_enabled);
|
||
|
+ ctx->fpu_enabled = false;
|
||
|
+}
|
||
|
+
|
||
|
+#else
|
||
|
+static void serpent_fpu_end_rt(struct crypt_priv *ctx) { }
|
||
|
+#endif
|
||
|
+
|
||
|
static void encrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes)
|
||
|
{
|
||
|
const unsigned int bsize = SERPENT_BLOCK_SIZE;
|
||
|
@@ -199,10 +214,12 @@ static void encrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes)
|
||
|
}
|
||
|
|
||
|
while (nbytes >= SERPENT_PARALLEL_BLOCKS * bsize) {
|
||
|
+ kernel_fpu_resched();
|
||
|
serpent_ecb_enc_8way_avx(ctx->ctx, srcdst, srcdst);
|
||
|
srcdst += bsize * SERPENT_PARALLEL_BLOCKS;
|
||
|
nbytes -= bsize * SERPENT_PARALLEL_BLOCKS;
|
||
|
}
|
||
|
+ serpent_fpu_end_rt(ctx);
|
||
|
|
||
|
for (i = 0; i < nbytes / bsize; i++, srcdst += bsize)
|
||
|
__serpent_encrypt(ctx->ctx, srcdst, srcdst);
|
||
|
@@ -223,10 +240,12 @@ static void decrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes)
|
||
|
}
|
||
|
|
||
|
while (nbytes >= SERPENT_PARALLEL_BLOCKS * bsize) {
|
||
|
+ kernel_fpu_resched();
|
||
|
serpent_ecb_dec_8way_avx(ctx->ctx, srcdst, srcdst);
|
||
|
srcdst += bsize * SERPENT_PARALLEL_BLOCKS;
|
||
|
nbytes -= bsize * SERPENT_PARALLEL_BLOCKS;
|
||
|
}
|
||
|
+ serpent_fpu_end_rt(ctx);
|
||
|
|
||
|
for (i = 0; i < nbytes / bsize; i++, srcdst += bsize)
|
||
|
__serpent_decrypt(ctx->ctx, srcdst, srcdst);
|
||
|
diff --git a/arch/x86/crypto/serpent_avx_glue.c b/arch/x86/crypto/serpent_avx_glue.c
|
||
|
index 6f778d3daa22..46dcbdbd0518 100644
|
||
|
--- a/arch/x86/crypto/serpent_avx_glue.c
|
||
|
+++ b/arch/x86/crypto/serpent_avx_glue.c
|
||
|
@@ -218,16 +218,31 @@ struct crypt_priv {
|
||
|
bool fpu_enabled;
|
||
|
};
|
||
|
|
||
|
+#ifdef CONFIG_PREEMPT_RT_FULL
|
||
|
+static void serpent_fpu_end_rt(struct crypt_priv *ctx)
|
||
|
+{
|
||
|
+ bool fpu_enabled = ctx->fpu_enabled;
|
||
|
+
|
||
|
+ if (!fpu_enabled)
|
||
|
+ return;
|
||
|
+ serpent_fpu_end(fpu_enabled);
|
||
|
+ ctx->fpu_enabled = false;
|
||
|
+}
|
||
|
+
|
||
|
+#else
|
||
|
+static void serpent_fpu_end_rt(struct crypt_priv *ctx) { }
|
||
|
+#endif
|
||
|
+
|
||
|
static void encrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes)
|
||
|
{
|
||
|
const unsigned int bsize = SERPENT_BLOCK_SIZE;
|
||
|
struct crypt_priv *ctx = priv;
|
||
|
int i;
|
||
|
|
||
|
- ctx->fpu_enabled = serpent_fpu_begin(ctx->fpu_enabled, nbytes);
|
||
|
-
|
||
|
if (nbytes == bsize * SERPENT_PARALLEL_BLOCKS) {
|
||
|
+ ctx->fpu_enabled = serpent_fpu_begin(ctx->fpu_enabled, nbytes);
|
||
|
serpent_ecb_enc_8way_avx(ctx->ctx, srcdst, srcdst);
|
||
|
+ serpent_fpu_end_rt(ctx);
|
||
|
return;
|
||
|
}
|
||
|
|
||
|
@@ -241,10 +256,10 @@ static void decrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes)
|
||
|
struct crypt_priv *ctx = priv;
|
||
|
int i;
|
||
|
|
||
|
- ctx->fpu_enabled = serpent_fpu_begin(ctx->fpu_enabled, nbytes);
|
||
|
-
|
||
|
if (nbytes == bsize * SERPENT_PARALLEL_BLOCKS) {
|
||
|
+ ctx->fpu_enabled = serpent_fpu_begin(ctx->fpu_enabled, nbytes);
|
||
|
serpent_ecb_dec_8way_avx(ctx->ctx, srcdst, srcdst);
|
||
|
+ serpent_fpu_end_rt(ctx);
|
||
|
return;
|
||
|
}
|
||
|
|
||
|
diff --git a/arch/x86/crypto/serpent_sse2_glue.c b/arch/x86/crypto/serpent_sse2_glue.c
|
||
|
index 644f97ab8cac..7b135b4f6262 100644
|
||
|
--- a/arch/x86/crypto/serpent_sse2_glue.c
|
||
|
+++ b/arch/x86/crypto/serpent_sse2_glue.c
|
||
|
@@ -187,16 +187,31 @@ struct crypt_priv {
|
||
|
bool fpu_enabled;
|
||
|
};
|
||
|
|
||
|
+#ifdef CONFIG_PREEMPT_RT_FULL
|
||
|
+static void serpent_fpu_end_rt(struct crypt_priv *ctx)
|
||
|
+{
|
||
|
+ bool fpu_enabled = ctx->fpu_enabled;
|
||
|
+
|
||
|
+ if (!fpu_enabled)
|
||
|
+ return;
|
||
|
+ serpent_fpu_end(fpu_enabled);
|
||
|
+ ctx->fpu_enabled = false;
|
||
|
+}
|
||
|
+
|
||
|
+#else
|
||
|
+static void serpent_fpu_end_rt(struct crypt_priv *ctx) { }
|
||
|
+#endif
|
||
|
+
|
||
|
static void encrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes)
|
||
|
{
|
||
|
const unsigned int bsize = SERPENT_BLOCK_SIZE;
|
||
|
struct crypt_priv *ctx = priv;
|
||
|
int i;
|
||
|
|
||
|
- ctx->fpu_enabled = serpent_fpu_begin(ctx->fpu_enabled, nbytes);
|
||
|
-
|
||
|
if (nbytes == bsize * SERPENT_PARALLEL_BLOCKS) {
|
||
|
+ ctx->fpu_enabled = serpent_fpu_begin(ctx->fpu_enabled, nbytes);
|
||
|
serpent_enc_blk_xway(ctx->ctx, srcdst, srcdst);
|
||
|
+ serpent_fpu_end_rt(ctx);
|
||
|
return;
|
||
|
}
|
||
|
|
||
|
@@ -210,10 +225,10 @@ static void decrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes)
|
||
|
struct crypt_priv *ctx = priv;
|
||
|
int i;
|
||
|
|
||
|
- ctx->fpu_enabled = serpent_fpu_begin(ctx->fpu_enabled, nbytes);
|
||
|
-
|
||
|
if (nbytes == bsize * SERPENT_PARALLEL_BLOCKS) {
|
||
|
+ ctx->fpu_enabled = serpent_fpu_begin(ctx->fpu_enabled, nbytes);
|
||
|
serpent_dec_blk_xway(ctx->ctx, srcdst, srcdst);
|
||
|
+ serpent_fpu_end_rt(ctx);
|
||
|
return;
|
||
|
}
|
||
|
|
||
|
diff --git a/arch/x86/crypto/twofish_avx_glue.c b/arch/x86/crypto/twofish_avx_glue.c
|
||
|
index b7a3904b953c..de00fe24927e 100644
|
||
|
--- a/arch/x86/crypto/twofish_avx_glue.c
|
||
|
+++ b/arch/x86/crypto/twofish_avx_glue.c
|
||
|
@@ -218,6 +218,21 @@ struct crypt_priv {
|
||
|
bool fpu_enabled;
|
||
|
};
|
||
|
|
||
|
+#ifdef CONFIG_PREEMPT_RT_FULL
|
||
|
+static void twofish_fpu_end_rt(struct crypt_priv *ctx)
|
||
|
+{
|
||
|
+ bool fpu_enabled = ctx->fpu_enabled;
|
||
|
+
|
||
|
+ if (!fpu_enabled)
|
||
|
+ return;
|
||
|
+ twofish_fpu_end(fpu_enabled);
|
||
|
+ ctx->fpu_enabled = false;
|
||
|
+}
|
||
|
+
|
||
|
+#else
|
||
|
+static void twofish_fpu_end_rt(struct crypt_priv *ctx) { }
|
||
|
+#endif
|
||
|
+
|
||
|
static void encrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes)
|
||
|
{
|
||
|
const unsigned int bsize = TF_BLOCK_SIZE;
|
||
|
@@ -228,12 +243,16 @@ static void encrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes)
|
||
|
|
||
|
if (nbytes == bsize * TWOFISH_PARALLEL_BLOCKS) {
|
||
|
twofish_ecb_enc_8way(ctx->ctx, srcdst, srcdst);
|
||
|
+ twofish_fpu_end_rt(ctx);
|
||
|
return;
|
||
|
}
|
||
|
|
||
|
- for (i = 0; i < nbytes / (bsize * 3); i++, srcdst += bsize * 3)
|
||
|
+ for (i = 0; i < nbytes / (bsize * 3); i++, srcdst += bsize * 3) {
|
||
|
+ kernel_fpu_resched();
|
||
|
twofish_enc_blk_3way(ctx->ctx, srcdst, srcdst);
|
||
|
+ }
|
||
|
|
||
|
+ twofish_fpu_end_rt(ctx);
|
||
|
nbytes %= bsize * 3;
|
||
|
|
||
|
for (i = 0; i < nbytes / bsize; i++, srcdst += bsize)
|
||
|
@@ -250,11 +269,15 @@ static void decrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes)
|
||
|
|
||
|
if (nbytes == bsize * TWOFISH_PARALLEL_BLOCKS) {
|
||
|
twofish_ecb_dec_8way(ctx->ctx, srcdst, srcdst);
|
||
|
+ twofish_fpu_end_rt(ctx);
|
||
|
return;
|
||
|
}
|
||
|
|
||
|
- for (i = 0; i < nbytes / (bsize * 3); i++, srcdst += bsize * 3)
|
||
|
+ for (i = 0; i < nbytes / (bsize * 3); i++, srcdst += bsize * 3) {
|
||
|
+ kernel_fpu_resched();
|
||
|
twofish_dec_blk_3way(ctx->ctx, srcdst, srcdst);
|
||
|
+ }
|
||
|
+ twofish_fpu_end_rt(ctx);
|
||
|
|
||
|
nbytes %= bsize * 3;
|
||
|
|
||
|
diff --git a/arch/x86/include/asm/fpu/api.h b/arch/x86/include/asm/fpu/api.h
|
||
|
index 1429a7c736db..85428df40a22 100644
|
||
|
--- a/arch/x86/include/asm/fpu/api.h
|
||
|
+++ b/arch/x86/include/asm/fpu/api.h
|
||
|
@@ -24,6 +24,7 @@ extern void __kernel_fpu_begin(void);
|
||
|
extern void __kernel_fpu_end(void);
|
||
|
extern void kernel_fpu_begin(void);
|
||
|
extern void kernel_fpu_end(void);
|
||
|
+extern void kernel_fpu_resched(void);
|
||
|
extern bool irq_fpu_usable(void);
|
||
|
|
||
|
/*
|
||
|
diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c
|
||
|
index fc965118d2e6..d7f42162cb6f 100644
|
||
|
--- a/arch/x86/kernel/fpu/core.c
|
||
|
+++ b/arch/x86/kernel/fpu/core.c
|
||
|
@@ -138,6 +138,18 @@ void kernel_fpu_end(void)
|
||
|
}
|
||
|
EXPORT_SYMBOL_GPL(kernel_fpu_end);
|
||
|
|
||
|
+void kernel_fpu_resched(void)
|
||
|
+{
|
||
|
+ WARN_ON_FPU(!this_cpu_read(in_kernel_fpu));
|
||
|
+
|
||
|
+ if (should_resched(PREEMPT_OFFSET)) {
|
||
|
+ kernel_fpu_end();
|
||
|
+ cond_resched();
|
||
|
+ kernel_fpu_begin();
|
||
|
+ }
|
||
|
+}
|
||
|
+EXPORT_SYMBOL_GPL(kernel_fpu_resched);
|
||
|
+
|
||
|
/*
|
||
|
* CR0::TS save/restore functions:
|
||
|
*/
|
||
|
--
|
||
|
2.28.0
|
||
|
|