919 lines
17 KiB
ArmAsm
919 lines
17 KiB
ArmAsm
|
#include <linux/linkage.h>
|
||
|
|
||
|
# enter salsa20_encrypt_bytes
|
||
|
ENTRY(salsa20_encrypt_bytes)
|
||
|
mov %rsp,%r11
|
||
|
and $31,%r11
|
||
|
add $256,%r11
|
||
|
sub %r11,%rsp
|
||
|
# x = arg1
|
||
|
mov %rdi,%r8
|
||
|
# m = arg2
|
||
|
mov %rsi,%rsi
|
||
|
# out = arg3
|
||
|
mov %rdx,%rdi
|
||
|
# bytes = arg4
|
||
|
mov %rcx,%rdx
|
||
|
# unsigned>? bytes - 0
|
||
|
cmp $0,%rdx
|
||
|
# comment:fp stack unchanged by jump
|
||
|
# goto done if !unsigned>
|
||
|
jbe ._done
|
||
|
# comment:fp stack unchanged by fallthrough
|
||
|
# start:
|
||
|
._start:
|
||
|
# r11_stack = r11
|
||
|
movq %r11,0(%rsp)
|
||
|
# r12_stack = r12
|
||
|
movq %r12,8(%rsp)
|
||
|
# r13_stack = r13
|
||
|
movq %r13,16(%rsp)
|
||
|
# r14_stack = r14
|
||
|
movq %r14,24(%rsp)
|
||
|
# r15_stack = r15
|
||
|
movq %r15,32(%rsp)
|
||
|
# rbx_stack = rbx
|
||
|
movq %rbx,40(%rsp)
|
||
|
# rbp_stack = rbp
|
||
|
movq %rbp,48(%rsp)
|
||
|
# in0 = *(uint64 *) (x + 0)
|
||
|
movq 0(%r8),%rcx
|
||
|
# in2 = *(uint64 *) (x + 8)
|
||
|
movq 8(%r8),%r9
|
||
|
# in4 = *(uint64 *) (x + 16)
|
||
|
movq 16(%r8),%rax
|
||
|
# in6 = *(uint64 *) (x + 24)
|
||
|
movq 24(%r8),%r10
|
||
|
# in8 = *(uint64 *) (x + 32)
|
||
|
movq 32(%r8),%r11
|
||
|
# in10 = *(uint64 *) (x + 40)
|
||
|
movq 40(%r8),%r12
|
||
|
# in12 = *(uint64 *) (x + 48)
|
||
|
movq 48(%r8),%r13
|
||
|
# in14 = *(uint64 *) (x + 56)
|
||
|
movq 56(%r8),%r14
|
||
|
# j0 = in0
|
||
|
movq %rcx,56(%rsp)
|
||
|
# j2 = in2
|
||
|
movq %r9,64(%rsp)
|
||
|
# j4 = in4
|
||
|
movq %rax,72(%rsp)
|
||
|
# j6 = in6
|
||
|
movq %r10,80(%rsp)
|
||
|
# j8 = in8
|
||
|
movq %r11,88(%rsp)
|
||
|
# j10 = in10
|
||
|
movq %r12,96(%rsp)
|
||
|
# j12 = in12
|
||
|
movq %r13,104(%rsp)
|
||
|
# j14 = in14
|
||
|
movq %r14,112(%rsp)
|
||
|
# x_backup = x
|
||
|
movq %r8,120(%rsp)
|
||
|
# bytesatleast1:
|
||
|
._bytesatleast1:
|
||
|
# unsigned<? bytes - 64
|
||
|
cmp $64,%rdx
|
||
|
# comment:fp stack unchanged by jump
|
||
|
# goto nocopy if !unsigned<
|
||
|
jae ._nocopy
|
||
|
# ctarget = out
|
||
|
movq %rdi,128(%rsp)
|
||
|
# out = &tmp
|
||
|
leaq 192(%rsp),%rdi
|
||
|
# i = bytes
|
||
|
mov %rdx,%rcx
|
||
|
# while (i) { *out++ = *m++; --i }
|
||
|
rep movsb
|
||
|
# out = &tmp
|
||
|
leaq 192(%rsp),%rdi
|
||
|
# m = &tmp
|
||
|
leaq 192(%rsp),%rsi
|
||
|
# comment:fp stack unchanged by fallthrough
|
||
|
# nocopy:
|
||
|
._nocopy:
|
||
|
# out_backup = out
|
||
|
movq %rdi,136(%rsp)
|
||
|
# m_backup = m
|
||
|
movq %rsi,144(%rsp)
|
||
|
# bytes_backup = bytes
|
||
|
movq %rdx,152(%rsp)
|
||
|
# x1 = j0
|
||
|
movq 56(%rsp),%rdi
|
||
|
# x0 = x1
|
||
|
mov %rdi,%rdx
|
||
|
# (uint64) x1 >>= 32
|
||
|
shr $32,%rdi
|
||
|
# x3 = j2
|
||
|
movq 64(%rsp),%rsi
|
||
|
# x2 = x3
|
||
|
mov %rsi,%rcx
|
||
|
# (uint64) x3 >>= 32
|
||
|
shr $32,%rsi
|
||
|
# x5 = j4
|
||
|
movq 72(%rsp),%r8
|
||
|
# x4 = x5
|
||
|
mov %r8,%r9
|
||
|
# (uint64) x5 >>= 32
|
||
|
shr $32,%r8
|
||
|
# x5_stack = x5
|
||
|
movq %r8,160(%rsp)
|
||
|
# x7 = j6
|
||
|
movq 80(%rsp),%r8
|
||
|
# x6 = x7
|
||
|
mov %r8,%rax
|
||
|
# (uint64) x7 >>= 32
|
||
|
shr $32,%r8
|
||
|
# x9 = j8
|
||
|
movq 88(%rsp),%r10
|
||
|
# x8 = x9
|
||
|
mov %r10,%r11
|
||
|
# (uint64) x9 >>= 32
|
||
|
shr $32,%r10
|
||
|
# x11 = j10
|
||
|
movq 96(%rsp),%r12
|
||
|
# x10 = x11
|
||
|
mov %r12,%r13
|
||
|
# x10_stack = x10
|
||
|
movq %r13,168(%rsp)
|
||
|
# (uint64) x11 >>= 32
|
||
|
shr $32,%r12
|
||
|
# x13 = j12
|
||
|
movq 104(%rsp),%r13
|
||
|
# x12 = x13
|
||
|
mov %r13,%r14
|
||
|
# (uint64) x13 >>= 32
|
||
|
shr $32,%r13
|
||
|
# x15 = j14
|
||
|
movq 112(%rsp),%r15
|
||
|
# x14 = x15
|
||
|
mov %r15,%rbx
|
||
|
# (uint64) x15 >>= 32
|
||
|
shr $32,%r15
|
||
|
# x15_stack = x15
|
||
|
movq %r15,176(%rsp)
|
||
|
# i = 20
|
||
|
mov $20,%r15
|
||
|
# mainloop:
|
||
|
._mainloop:
|
||
|
# i_backup = i
|
||
|
movq %r15,184(%rsp)
|
||
|
# x5 = x5_stack
|
||
|
movq 160(%rsp),%r15
|
||
|
# a = x12 + x0
|
||
|
lea (%r14,%rdx),%rbp
|
||
|
# (uint32) a <<<= 7
|
||
|
rol $7,%ebp
|
||
|
# x4 ^= a
|
||
|
xor %rbp,%r9
|
||
|
# b = x1 + x5
|
||
|
lea (%rdi,%r15),%rbp
|
||
|
# (uint32) b <<<= 7
|
||
|
rol $7,%ebp
|
||
|
# x9 ^= b
|
||
|
xor %rbp,%r10
|
||
|
# a = x0 + x4
|
||
|
lea (%rdx,%r9),%rbp
|
||
|
# (uint32) a <<<= 9
|
||
|
rol $9,%ebp
|
||
|
# x8 ^= a
|
||
|
xor %rbp,%r11
|
||
|
# b = x5 + x9
|
||
|
lea (%r15,%r10),%rbp
|
||
|
# (uint32) b <<<= 9
|
||
|
rol $9,%ebp
|
||
|
# x13 ^= b
|
||
|
xor %rbp,%r13
|
||
|
# a = x4 + x8
|
||
|
lea (%r9,%r11),%rbp
|
||
|
# (uint32) a <<<= 13
|
||
|
rol $13,%ebp
|
||
|
# x12 ^= a
|
||
|
xor %rbp,%r14
|
||
|
# b = x9 + x13
|
||
|
lea (%r10,%r13),%rbp
|
||
|
# (uint32) b <<<= 13
|
||
|
rol $13,%ebp
|
||
|
# x1 ^= b
|
||
|
xor %rbp,%rdi
|
||
|
# a = x8 + x12
|
||
|
lea (%r11,%r14),%rbp
|
||
|
# (uint32) a <<<= 18
|
||
|
rol $18,%ebp
|
||
|
# x0 ^= a
|
||
|
xor %rbp,%rdx
|
||
|
# b = x13 + x1
|
||
|
lea (%r13,%rdi),%rbp
|
||
|
# (uint32) b <<<= 18
|
||
|
rol $18,%ebp
|
||
|
# x5 ^= b
|
||
|
xor %rbp,%r15
|
||
|
# x10 = x10_stack
|
||
|
movq 168(%rsp),%rbp
|
||
|
# x5_stack = x5
|
||
|
movq %r15,160(%rsp)
|
||
|
# c = x6 + x10
|
||
|
lea (%rax,%rbp),%r15
|
||
|
# (uint32) c <<<= 7
|
||
|
rol $7,%r15d
|
||
|
# x14 ^= c
|
||
|
xor %r15,%rbx
|
||
|
# c = x10 + x14
|
||
|
lea (%rbp,%rbx),%r15
|
||
|
# (uint32) c <<<= 9
|
||
|
rol $9,%r15d
|
||
|
# x2 ^= c
|
||
|
xor %r15,%rcx
|
||
|
# c = x14 + x2
|
||
|
lea (%rbx,%rcx),%r15
|
||
|
# (uint32) c <<<= 13
|
||
|
rol $13,%r15d
|
||
|
# x6 ^= c
|
||
|
xor %r15,%rax
|
||
|
# c = x2 + x6
|
||
|
lea (%rcx,%rax),%r15
|
||
|
# (uint32) c <<<= 18
|
||
|
rol $18,%r15d
|
||
|
# x10 ^= c
|
||
|
xor %r15,%rbp
|
||
|
# x15 = x15_stack
|
||
|
movq 176(%rsp),%r15
|
||
|
# x10_stack = x10
|
||
|
movq %rbp,168(%rsp)
|
||
|
# d = x11 + x15
|
||
|
lea (%r12,%r15),%rbp
|
||
|
# (uint32) d <<<= 7
|
||
|
rol $7,%ebp
|
||
|
# x3 ^= d
|
||
|
xor %rbp,%rsi
|
||
|
# d = x15 + x3
|
||
|
lea (%r15,%rsi),%rbp
|
||
|
# (uint32) d <<<= 9
|
||
|
rol $9,%ebp
|
||
|
# x7 ^= d
|
||
|
xor %rbp,%r8
|
||
|
# d = x3 + x7
|
||
|
lea (%rsi,%r8),%rbp
|
||
|
# (uint32) d <<<= 13
|
||
|
rol $13,%ebp
|
||
|
# x11 ^= d
|
||
|
xor %rbp,%r12
|
||
|
# d = x7 + x11
|
||
|
lea (%r8,%r12),%rbp
|
||
|
# (uint32) d <<<= 18
|
||
|
rol $18,%ebp
|
||
|
# x15 ^= d
|
||
|
xor %rbp,%r15
|
||
|
# x15_stack = x15
|
||
|
movq %r15,176(%rsp)
|
||
|
# x5 = x5_stack
|
||
|
movq 160(%rsp),%r15
|
||
|
# a = x3 + x0
|
||
|
lea (%rsi,%rdx),%rbp
|
||
|
# (uint32) a <<<= 7
|
||
|
rol $7,%ebp
|
||
|
# x1 ^= a
|
||
|
xor %rbp,%rdi
|
||
|
# b = x4 + x5
|
||
|
lea (%r9,%r15),%rbp
|
||
|
# (uint32) b <<<= 7
|
||
|
rol $7,%ebp
|
||
|
# x6 ^= b
|
||
|
xor %rbp,%rax
|
||
|
# a = x0 + x1
|
||
|
lea (%rdx,%rdi),%rbp
|
||
|
# (uint32) a <<<= 9
|
||
|
rol $9,%ebp
|
||
|
# x2 ^= a
|
||
|
xor %rbp,%rcx
|
||
|
# b = x5 + x6
|
||
|
lea (%r15,%rax),%rbp
|
||
|
# (uint32) b <<<= 9
|
||
|
rol $9,%ebp
|
||
|
# x7 ^= b
|
||
|
xor %rbp,%r8
|
||
|
# a = x1 + x2
|
||
|
lea (%rdi,%rcx),%rbp
|
||
|
# (uint32) a <<<= 13
|
||
|
rol $13,%ebp
|
||
|
# x3 ^= a
|
||
|
xor %rbp,%rsi
|
||
|
# b = x6 + x7
|
||
|
lea (%rax,%r8),%rbp
|
||
|
# (uint32) b <<<= 13
|
||
|
rol $13,%ebp
|
||
|
# x4 ^= b
|
||
|
xor %rbp,%r9
|
||
|
# a = x2 + x3
|
||
|
lea (%rcx,%rsi),%rbp
|
||
|
# (uint32) a <<<= 18
|
||
|
rol $18,%ebp
|
||
|
# x0 ^= a
|
||
|
xor %rbp,%rdx
|
||
|
# b = x7 + x4
|
||
|
lea (%r8,%r9),%rbp
|
||
|
# (uint32) b <<<= 18
|
||
|
rol $18,%ebp
|
||
|
# x5 ^= b
|
||
|
xor %rbp,%r15
|
||
|
# x10 = x10_stack
|
||
|
movq 168(%rsp),%rbp
|
||
|
# x5_stack = x5
|
||
|
movq %r15,160(%rsp)
|
||
|
# c = x9 + x10
|
||
|
lea (%r10,%rbp),%r15
|
||
|
# (uint32) c <<<= 7
|
||
|
rol $7,%r15d
|
||
|
# x11 ^= c
|
||
|
xor %r15,%r12
|
||
|
# c = x10 + x11
|
||
|
lea (%rbp,%r12),%r15
|
||
|
# (uint32) c <<<= 9
|
||
|
rol $9,%r15d
|
||
|
# x8 ^= c
|
||
|
xor %r15,%r11
|
||
|
# c = x11 + x8
|
||
|
lea (%r12,%r11),%r15
|
||
|
# (uint32) c <<<= 13
|
||
|
rol $13,%r15d
|
||
|
# x9 ^= c
|
||
|
xor %r15,%r10
|
||
|
# c = x8 + x9
|
||
|
lea (%r11,%r10),%r15
|
||
|
# (uint32) c <<<= 18
|
||
|
rol $18,%r15d
|
||
|
# x10 ^= c
|
||
|
xor %r15,%rbp
|
||
|
# x15 = x15_stack
|
||
|
movq 176(%rsp),%r15
|
||
|
# x10_stack = x10
|
||
|
movq %rbp,168(%rsp)
|
||
|
# d = x14 + x15
|
||
|
lea (%rbx,%r15),%rbp
|
||
|
# (uint32) d <<<= 7
|
||
|
rol $7,%ebp
|
||
|
# x12 ^= d
|
||
|
xor %rbp,%r14
|
||
|
# d = x15 + x12
|
||
|
lea (%r15,%r14),%rbp
|
||
|
# (uint32) d <<<= 9
|
||
|
rol $9,%ebp
|
||
|
# x13 ^= d
|
||
|
xor %rbp,%r13
|
||
|
# d = x12 + x13
|
||
|
lea (%r14,%r13),%rbp
|
||
|
# (uint32) d <<<= 13
|
||
|
rol $13,%ebp
|
||
|
# x14 ^= d
|
||
|
xor %rbp,%rbx
|
||
|
# d = x13 + x14
|
||
|
lea (%r13,%rbx),%rbp
|
||
|
# (uint32) d <<<= 18
|
||
|
rol $18,%ebp
|
||
|
# x15 ^= d
|
||
|
xor %rbp,%r15
|
||
|
# x15_stack = x15
|
||
|
movq %r15,176(%rsp)
|
||
|
# x5 = x5_stack
|
||
|
movq 160(%rsp),%r15
|
||
|
# a = x12 + x0
|
||
|
lea (%r14,%rdx),%rbp
|
||
|
# (uint32) a <<<= 7
|
||
|
rol $7,%ebp
|
||
|
# x4 ^= a
|
||
|
xor %rbp,%r9
|
||
|
# b = x1 + x5
|
||
|
lea (%rdi,%r15),%rbp
|
||
|
# (uint32) b <<<= 7
|
||
|
rol $7,%ebp
|
||
|
# x9 ^= b
|
||
|
xor %rbp,%r10
|
||
|
# a = x0 + x4
|
||
|
lea (%rdx,%r9),%rbp
|
||
|
# (uint32) a <<<= 9
|
||
|
rol $9,%ebp
|
||
|
# x8 ^= a
|
||
|
xor %rbp,%r11
|
||
|
# b = x5 + x9
|
||
|
lea (%r15,%r10),%rbp
|
||
|
# (uint32) b <<<= 9
|
||
|
rol $9,%ebp
|
||
|
# x13 ^= b
|
||
|
xor %rbp,%r13
|
||
|
# a = x4 + x8
|
||
|
lea (%r9,%r11),%rbp
|
||
|
# (uint32) a <<<= 13
|
||
|
rol $13,%ebp
|
||
|
# x12 ^= a
|
||
|
xor %rbp,%r14
|
||
|
# b = x9 + x13
|
||
|
lea (%r10,%r13),%rbp
|
||
|
# (uint32) b <<<= 13
|
||
|
rol $13,%ebp
|
||
|
# x1 ^= b
|
||
|
xor %rbp,%rdi
|
||
|
# a = x8 + x12
|
||
|
lea (%r11,%r14),%rbp
|
||
|
# (uint32) a <<<= 18
|
||
|
rol $18,%ebp
|
||
|
# x0 ^= a
|
||
|
xor %rbp,%rdx
|
||
|
# b = x13 + x1
|
||
|
lea (%r13,%rdi),%rbp
|
||
|
# (uint32) b <<<= 18
|
||
|
rol $18,%ebp
|
||
|
# x5 ^= b
|
||
|
xor %rbp,%r15
|
||
|
# x10 = x10_stack
|
||
|
movq 168(%rsp),%rbp
|
||
|
# x5_stack = x5
|
||
|
movq %r15,160(%rsp)
|
||
|
# c = x6 + x10
|
||
|
lea (%rax,%rbp),%r15
|
||
|
# (uint32) c <<<= 7
|
||
|
rol $7,%r15d
|
||
|
# x14 ^= c
|
||
|
xor %r15,%rbx
|
||
|
# c = x10 + x14
|
||
|
lea (%rbp,%rbx),%r15
|
||
|
# (uint32) c <<<= 9
|
||
|
rol $9,%r15d
|
||
|
# x2 ^= c
|
||
|
xor %r15,%rcx
|
||
|
# c = x14 + x2
|
||
|
lea (%rbx,%rcx),%r15
|
||
|
# (uint32) c <<<= 13
|
||
|
rol $13,%r15d
|
||
|
# x6 ^= c
|
||
|
xor %r15,%rax
|
||
|
# c = x2 + x6
|
||
|
lea (%rcx,%rax),%r15
|
||
|
# (uint32) c <<<= 18
|
||
|
rol $18,%r15d
|
||
|
# x10 ^= c
|
||
|
xor %r15,%rbp
|
||
|
# x15 = x15_stack
|
||
|
movq 176(%rsp),%r15
|
||
|
# x10_stack = x10
|
||
|
movq %rbp,168(%rsp)
|
||
|
# d = x11 + x15
|
||
|
lea (%r12,%r15),%rbp
|
||
|
# (uint32) d <<<= 7
|
||
|
rol $7,%ebp
|
||
|
# x3 ^= d
|
||
|
xor %rbp,%rsi
|
||
|
# d = x15 + x3
|
||
|
lea (%r15,%rsi),%rbp
|
||
|
# (uint32) d <<<= 9
|
||
|
rol $9,%ebp
|
||
|
# x7 ^= d
|
||
|
xor %rbp,%r8
|
||
|
# d = x3 + x7
|
||
|
lea (%rsi,%r8),%rbp
|
||
|
# (uint32) d <<<= 13
|
||
|
rol $13,%ebp
|
||
|
# x11 ^= d
|
||
|
xor %rbp,%r12
|
||
|
# d = x7 + x11
|
||
|
lea (%r8,%r12),%rbp
|
||
|
# (uint32) d <<<= 18
|
||
|
rol $18,%ebp
|
||
|
# x15 ^= d
|
||
|
xor %rbp,%r15
|
||
|
# x15_stack = x15
|
||
|
movq %r15,176(%rsp)
|
||
|
# x5 = x5_stack
|
||
|
movq 160(%rsp),%r15
|
||
|
# a = x3 + x0
|
||
|
lea (%rsi,%rdx),%rbp
|
||
|
# (uint32) a <<<= 7
|
||
|
rol $7,%ebp
|
||
|
# x1 ^= a
|
||
|
xor %rbp,%rdi
|
||
|
# b = x4 + x5
|
||
|
lea (%r9,%r15),%rbp
|
||
|
# (uint32) b <<<= 7
|
||
|
rol $7,%ebp
|
||
|
# x6 ^= b
|
||
|
xor %rbp,%rax
|
||
|
# a = x0 + x1
|
||
|
lea (%rdx,%rdi),%rbp
|
||
|
# (uint32) a <<<= 9
|
||
|
rol $9,%ebp
|
||
|
# x2 ^= a
|
||
|
xor %rbp,%rcx
|
||
|
# b = x5 + x6
|
||
|
lea (%r15,%rax),%rbp
|
||
|
# (uint32) b <<<= 9
|
||
|
rol $9,%ebp
|
||
|
# x7 ^= b
|
||
|
xor %rbp,%r8
|
||
|
# a = x1 + x2
|
||
|
lea (%rdi,%rcx),%rbp
|
||
|
# (uint32) a <<<= 13
|
||
|
rol $13,%ebp
|
||
|
# x3 ^= a
|
||
|
xor %rbp,%rsi
|
||
|
# b = x6 + x7
|
||
|
lea (%rax,%r8),%rbp
|
||
|
# (uint32) b <<<= 13
|
||
|
rol $13,%ebp
|
||
|
# x4 ^= b
|
||
|
xor %rbp,%r9
|
||
|
# a = x2 + x3
|
||
|
lea (%rcx,%rsi),%rbp
|
||
|
# (uint32) a <<<= 18
|
||
|
rol $18,%ebp
|
||
|
# x0 ^= a
|
||
|
xor %rbp,%rdx
|
||
|
# b = x7 + x4
|
||
|
lea (%r8,%r9),%rbp
|
||
|
# (uint32) b <<<= 18
|
||
|
rol $18,%ebp
|
||
|
# x5 ^= b
|
||
|
xor %rbp,%r15
|
||
|
# x10 = x10_stack
|
||
|
movq 168(%rsp),%rbp
|
||
|
# x5_stack = x5
|
||
|
movq %r15,160(%rsp)
|
||
|
# c = x9 + x10
|
||
|
lea (%r10,%rbp),%r15
|
||
|
# (uint32) c <<<= 7
|
||
|
rol $7,%r15d
|
||
|
# x11 ^= c
|
||
|
xor %r15,%r12
|
||
|
# c = x10 + x11
|
||
|
lea (%rbp,%r12),%r15
|
||
|
# (uint32) c <<<= 9
|
||
|
rol $9,%r15d
|
||
|
# x8 ^= c
|
||
|
xor %r15,%r11
|
||
|
# c = x11 + x8
|
||
|
lea (%r12,%r11),%r15
|
||
|
# (uint32) c <<<= 13
|
||
|
rol $13,%r15d
|
||
|
# x9 ^= c
|
||
|
xor %r15,%r10
|
||
|
# c = x8 + x9
|
||
|
lea (%r11,%r10),%r15
|
||
|
# (uint32) c <<<= 18
|
||
|
rol $18,%r15d
|
||
|
# x10 ^= c
|
||
|
xor %r15,%rbp
|
||
|
# x15 = x15_stack
|
||
|
movq 176(%rsp),%r15
|
||
|
# x10_stack = x10
|
||
|
movq %rbp,168(%rsp)
|
||
|
# d = x14 + x15
|
||
|
lea (%rbx,%r15),%rbp
|
||
|
# (uint32) d <<<= 7
|
||
|
rol $7,%ebp
|
||
|
# x12 ^= d
|
||
|
xor %rbp,%r14
|
||
|
# d = x15 + x12
|
||
|
lea (%r15,%r14),%rbp
|
||
|
# (uint32) d <<<= 9
|
||
|
rol $9,%ebp
|
||
|
# x13 ^= d
|
||
|
xor %rbp,%r13
|
||
|
# d = x12 + x13
|
||
|
lea (%r14,%r13),%rbp
|
||
|
# (uint32) d <<<= 13
|
||
|
rol $13,%ebp
|
||
|
# x14 ^= d
|
||
|
xor %rbp,%rbx
|
||
|
# d = x13 + x14
|
||
|
lea (%r13,%rbx),%rbp
|
||
|
# (uint32) d <<<= 18
|
||
|
rol $18,%ebp
|
||
|
# x15 ^= d
|
||
|
xor %rbp,%r15
|
||
|
# x15_stack = x15
|
||
|
movq %r15,176(%rsp)
|
||
|
# i = i_backup
|
||
|
movq 184(%rsp),%r15
|
||
|
# unsigned>? i -= 4
|
||
|
sub $4,%r15
|
||
|
# comment:fp stack unchanged by jump
|
||
|
# goto mainloop if unsigned>
|
||
|
ja ._mainloop
|
||
|
# (uint32) x2 += j2
|
||
|
addl 64(%rsp),%ecx
|
||
|
# x3 <<= 32
|
||
|
shl $32,%rsi
|
||
|
# x3 += j2
|
||
|
addq 64(%rsp),%rsi
|
||
|
# (uint64) x3 >>= 32
|
||
|
shr $32,%rsi
|
||
|
# x3 <<= 32
|
||
|
shl $32,%rsi
|
||
|
# x2 += x3
|
||
|
add %rsi,%rcx
|
||
|
# (uint32) x6 += j6
|
||
|
addl 80(%rsp),%eax
|
||
|
# x7 <<= 32
|
||
|
shl $32,%r8
|
||
|
# x7 += j6
|
||
|
addq 80(%rsp),%r8
|
||
|
# (uint64) x7 >>= 32
|
||
|
shr $32,%r8
|
||
|
# x7 <<= 32
|
||
|
shl $32,%r8
|
||
|
# x6 += x7
|
||
|
add %r8,%rax
|
||
|
# (uint32) x8 += j8
|
||
|
addl 88(%rsp),%r11d
|
||
|
# x9 <<= 32
|
||
|
shl $32,%r10
|
||
|
# x9 += j8
|
||
|
addq 88(%rsp),%r10
|
||
|
# (uint64) x9 >>= 32
|
||
|
shr $32,%r10
|
||
|
# x9 <<= 32
|
||
|
shl $32,%r10
|
||
|
# x8 += x9
|
||
|
add %r10,%r11
|
||
|
# (uint32) x12 += j12
|
||
|
addl 104(%rsp),%r14d
|
||
|
# x13 <<= 32
|
||
|
shl $32,%r13
|
||
|
# x13 += j12
|
||
|
addq 104(%rsp),%r13
|
||
|
# (uint64) x13 >>= 32
|
||
|
shr $32,%r13
|
||
|
# x13 <<= 32
|
||
|
shl $32,%r13
|
||
|
# x12 += x13
|
||
|
add %r13,%r14
|
||
|
# (uint32) x0 += j0
|
||
|
addl 56(%rsp),%edx
|
||
|
# x1 <<= 32
|
||
|
shl $32,%rdi
|
||
|
# x1 += j0
|
||
|
addq 56(%rsp),%rdi
|
||
|
# (uint64) x1 >>= 32
|
||
|
shr $32,%rdi
|
||
|
# x1 <<= 32
|
||
|
shl $32,%rdi
|
||
|
# x0 += x1
|
||
|
add %rdi,%rdx
|
||
|
# x5 = x5_stack
|
||
|
movq 160(%rsp),%rdi
|
||
|
# (uint32) x4 += j4
|
||
|
addl 72(%rsp),%r9d
|
||
|
# x5 <<= 32
|
||
|
shl $32,%rdi
|
||
|
# x5 += j4
|
||
|
addq 72(%rsp),%rdi
|
||
|
# (uint64) x5 >>= 32
|
||
|
shr $32,%rdi
|
||
|
# x5 <<= 32
|
||
|
shl $32,%rdi
|
||
|
# x4 += x5
|
||
|
add %rdi,%r9
|
||
|
# x10 = x10_stack
|
||
|
movq 168(%rsp),%r8
|
||
|
# (uint32) x10 += j10
|
||
|
addl 96(%rsp),%r8d
|
||
|
# x11 <<= 32
|
||
|
shl $32,%r12
|
||
|
# x11 += j10
|
||
|
addq 96(%rsp),%r12
|
||
|
# (uint64) x11 >>= 32
|
||
|
shr $32,%r12
|
||
|
# x11 <<= 32
|
||
|
shl $32,%r12
|
||
|
# x10 += x11
|
||
|
add %r12,%r8
|
||
|
# x15 = x15_stack
|
||
|
movq 176(%rsp),%rdi
|
||
|
# (uint32) x14 += j14
|
||
|
addl 112(%rsp),%ebx
|
||
|
# x15 <<= 32
|
||
|
shl $32,%rdi
|
||
|
# x15 += j14
|
||
|
addq 112(%rsp),%rdi
|
||
|
# (uint64) x15 >>= 32
|
||
|
shr $32,%rdi
|
||
|
# x15 <<= 32
|
||
|
shl $32,%rdi
|
||
|
# x14 += x15
|
||
|
add %rdi,%rbx
|
||
|
# out = out_backup
|
||
|
movq 136(%rsp),%rdi
|
||
|
# m = m_backup
|
||
|
movq 144(%rsp),%rsi
|
||
|
# x0 ^= *(uint64 *) (m + 0)
|
||
|
xorq 0(%rsi),%rdx
|
||
|
# *(uint64 *) (out + 0) = x0
|
||
|
movq %rdx,0(%rdi)
|
||
|
# x2 ^= *(uint64 *) (m + 8)
|
||
|
xorq 8(%rsi),%rcx
|
||
|
# *(uint64 *) (out + 8) = x2
|
||
|
movq %rcx,8(%rdi)
|
||
|
# x4 ^= *(uint64 *) (m + 16)
|
||
|
xorq 16(%rsi),%r9
|
||
|
# *(uint64 *) (out + 16) = x4
|
||
|
movq %r9,16(%rdi)
|
||
|
# x6 ^= *(uint64 *) (m + 24)
|
||
|
xorq 24(%rsi),%rax
|
||
|
# *(uint64 *) (out + 24) = x6
|
||
|
movq %rax,24(%rdi)
|
||
|
# x8 ^= *(uint64 *) (m + 32)
|
||
|
xorq 32(%rsi),%r11
|
||
|
# *(uint64 *) (out + 32) = x8
|
||
|
movq %r11,32(%rdi)
|
||
|
# x10 ^= *(uint64 *) (m + 40)
|
||
|
xorq 40(%rsi),%r8
|
||
|
# *(uint64 *) (out + 40) = x10
|
||
|
movq %r8,40(%rdi)
|
||
|
# x12 ^= *(uint64 *) (m + 48)
|
||
|
xorq 48(%rsi),%r14
|
||
|
# *(uint64 *) (out + 48) = x12
|
||
|
movq %r14,48(%rdi)
|
||
|
# x14 ^= *(uint64 *) (m + 56)
|
||
|
xorq 56(%rsi),%rbx
|
||
|
# *(uint64 *) (out + 56) = x14
|
||
|
movq %rbx,56(%rdi)
|
||
|
# bytes = bytes_backup
|
||
|
movq 152(%rsp),%rdx
|
||
|
# in8 = j8
|
||
|
movq 88(%rsp),%rcx
|
||
|
# in8 += 1
|
||
|
add $1,%rcx
|
||
|
# j8 = in8
|
||
|
movq %rcx,88(%rsp)
|
||
|
# unsigned>? unsigned<? bytes - 64
|
||
|
cmp $64,%rdx
|
||
|
# comment:fp stack unchanged by jump
|
||
|
# goto bytesatleast65 if unsigned>
|
||
|
ja ._bytesatleast65
|
||
|
# comment:fp stack unchanged by jump
|
||
|
# goto bytesatleast64 if !unsigned<
|
||
|
jae ._bytesatleast64
|
||
|
# m = out
|
||
|
mov %rdi,%rsi
|
||
|
# out = ctarget
|
||
|
movq 128(%rsp),%rdi
|
||
|
# i = bytes
|
||
|
mov %rdx,%rcx
|
||
|
# while (i) { *out++ = *m++; --i }
|
||
|
rep movsb
|
||
|
# comment:fp stack unchanged by fallthrough
|
||
|
# bytesatleast64:
|
||
|
._bytesatleast64:
|
||
|
# x = x_backup
|
||
|
movq 120(%rsp),%rdi
|
||
|
# in8 = j8
|
||
|
movq 88(%rsp),%rsi
|
||
|
# *(uint64 *) (x + 32) = in8
|
||
|
movq %rsi,32(%rdi)
|
||
|
# r11 = r11_stack
|
||
|
movq 0(%rsp),%r11
|
||
|
# r12 = r12_stack
|
||
|
movq 8(%rsp),%r12
|
||
|
# r13 = r13_stack
|
||
|
movq 16(%rsp),%r13
|
||
|
# r14 = r14_stack
|
||
|
movq 24(%rsp),%r14
|
||
|
# r15 = r15_stack
|
||
|
movq 32(%rsp),%r15
|
||
|
# rbx = rbx_stack
|
||
|
movq 40(%rsp),%rbx
|
||
|
# rbp = rbp_stack
|
||
|
movq 48(%rsp),%rbp
|
||
|
# comment:fp stack unchanged by fallthrough
|
||
|
# done:
|
||
|
._done:
|
||
|
# leave
|
||
|
add %r11,%rsp
|
||
|
mov %rdi,%rax
|
||
|
mov %rsi,%rdx
|
||
|
ret
|
||
|
# bytesatleast65:
|
||
|
._bytesatleast65:
|
||
|
# bytes -= 64
|
||
|
sub $64,%rdx
|
||
|
# out += 64
|
||
|
add $64,%rdi
|
||
|
# m += 64
|
||
|
add $64,%rsi
|
||
|
# comment:fp stack unchanged by jump
|
||
|
# goto bytesatleast1
|
||
|
jmp ._bytesatleast1
|
||
|
ENDPROC(salsa20_encrypt_bytes)
|
||
|
|
||
|
# enter salsa20_keysetup
|
||
|
ENTRY(salsa20_keysetup)
|
||
|
mov %rsp,%r11
|
||
|
and $31,%r11
|
||
|
add $256,%r11
|
||
|
sub %r11,%rsp
|
||
|
# k = arg2
|
||
|
mov %rsi,%rsi
|
||
|
# kbits = arg3
|
||
|
mov %rdx,%rdx
|
||
|
# x = arg1
|
||
|
mov %rdi,%rdi
|
||
|
# in0 = *(uint64 *) (k + 0)
|
||
|
movq 0(%rsi),%r8
|
||
|
# in2 = *(uint64 *) (k + 8)
|
||
|
movq 8(%rsi),%r9
|
||
|
# *(uint64 *) (x + 4) = in0
|
||
|
movq %r8,4(%rdi)
|
||
|
# *(uint64 *) (x + 12) = in2
|
||
|
movq %r9,12(%rdi)
|
||
|
# unsigned<? kbits - 256
|
||
|
cmp $256,%rdx
|
||
|
# comment:fp stack unchanged by jump
|
||
|
# goto kbits128 if unsigned<
|
||
|
jb ._kbits128
|
||
|
# kbits256:
|
||
|
._kbits256:
|
||
|
# in10 = *(uint64 *) (k + 16)
|
||
|
movq 16(%rsi),%rdx
|
||
|
# in12 = *(uint64 *) (k + 24)
|
||
|
movq 24(%rsi),%rsi
|
||
|
# *(uint64 *) (x + 44) = in10
|
||
|
movq %rdx,44(%rdi)
|
||
|
# *(uint64 *) (x + 52) = in12
|
||
|
movq %rsi,52(%rdi)
|
||
|
# in0 = 1634760805
|
||
|
mov $1634760805,%rsi
|
||
|
# in4 = 857760878
|
||
|
mov $857760878,%rdx
|
||
|
# in10 = 2036477234
|
||
|
mov $2036477234,%rcx
|
||
|
# in14 = 1797285236
|
||
|
mov $1797285236,%r8
|
||
|
# *(uint32 *) (x + 0) = in0
|
||
|
movl %esi,0(%rdi)
|
||
|
# *(uint32 *) (x + 20) = in4
|
||
|
movl %edx,20(%rdi)
|
||
|
# *(uint32 *) (x + 40) = in10
|
||
|
movl %ecx,40(%rdi)
|
||
|
# *(uint32 *) (x + 60) = in14
|
||
|
movl %r8d,60(%rdi)
|
||
|
# comment:fp stack unchanged by jump
|
||
|
# goto keysetupdone
|
||
|
jmp ._keysetupdone
|
||
|
# kbits128:
|
||
|
._kbits128:
|
||
|
# in10 = *(uint64 *) (k + 0)
|
||
|
movq 0(%rsi),%rdx
|
||
|
# in12 = *(uint64 *) (k + 8)
|
||
|
movq 8(%rsi),%rsi
|
||
|
# *(uint64 *) (x + 44) = in10
|
||
|
movq %rdx,44(%rdi)
|
||
|
# *(uint64 *) (x + 52) = in12
|
||
|
movq %rsi,52(%rdi)
|
||
|
# in0 = 1634760805
|
||
|
mov $1634760805,%rsi
|
||
|
# in4 = 824206446
|
||
|
mov $824206446,%rdx
|
||
|
# in10 = 2036477238
|
||
|
mov $2036477238,%rcx
|
||
|
# in14 = 1797285236
|
||
|
mov $1797285236,%r8
|
||
|
# *(uint32 *) (x + 0) = in0
|
||
|
movl %esi,0(%rdi)
|
||
|
# *(uint32 *) (x + 20) = in4
|
||
|
movl %edx,20(%rdi)
|
||
|
# *(uint32 *) (x + 40) = in10
|
||
|
movl %ecx,40(%rdi)
|
||
|
# *(uint32 *) (x + 60) = in14
|
||
|
movl %r8d,60(%rdi)
|
||
|
# keysetupdone:
|
||
|
._keysetupdone:
|
||
|
# leave
|
||
|
add %r11,%rsp
|
||
|
mov %rdi,%rax
|
||
|
mov %rsi,%rdx
|
||
|
ret
|
||
|
ENDPROC(salsa20_keysetup)
|
||
|
|
||
|
# enter salsa20_ivsetup
|
||
|
ENTRY(salsa20_ivsetup)
|
||
|
mov %rsp,%r11
|
||
|
and $31,%r11
|
||
|
add $256,%r11
|
||
|
sub %r11,%rsp
|
||
|
# iv = arg2
|
||
|
mov %rsi,%rsi
|
||
|
# x = arg1
|
||
|
mov %rdi,%rdi
|
||
|
# in6 = *(uint64 *) (iv + 0)
|
||
|
movq 0(%rsi),%rsi
|
||
|
# in8 = 0
|
||
|
mov $0,%r8
|
||
|
# *(uint64 *) (x + 24) = in6
|
||
|
movq %rsi,24(%rdi)
|
||
|
# *(uint64 *) (x + 32) = in8
|
||
|
movq %r8,32(%rdi)
|
||
|
# leave
|
||
|
add %r11,%rsp
|
||
|
mov %rdi,%rax
|
||
|
mov %rsi,%rdx
|
||
|
ret
|
||
|
ENDPROC(salsa20_ivsetup)
|