1115 lines
20 KiB
ArmAsm
1115 lines
20 KiB
ArmAsm
|
# salsa20_pm.s version 20051229
|
||
|
# D. J. Bernstein
|
||
|
# Public domain.
|
||
|
|
||
|
#include <linux/linkage.h>
|
||
|
|
||
|
.text
|
||
|
|
||
|
# enter salsa20_encrypt_bytes
|
||
|
ENTRY(salsa20_encrypt_bytes)
|
||
|
mov %esp,%eax
|
||
|
and $31,%eax
|
||
|
add $256,%eax
|
||
|
sub %eax,%esp
|
||
|
# eax_stack = eax
|
||
|
movl %eax,80(%esp)
|
||
|
# ebx_stack = ebx
|
||
|
movl %ebx,84(%esp)
|
||
|
# esi_stack = esi
|
||
|
movl %esi,88(%esp)
|
||
|
# edi_stack = edi
|
||
|
movl %edi,92(%esp)
|
||
|
# ebp_stack = ebp
|
||
|
movl %ebp,96(%esp)
|
||
|
# x = arg1
|
||
|
movl 4(%esp,%eax),%edx
|
||
|
# m = arg2
|
||
|
movl 8(%esp,%eax),%esi
|
||
|
# out = arg3
|
||
|
movl 12(%esp,%eax),%edi
|
||
|
# bytes = arg4
|
||
|
movl 16(%esp,%eax),%ebx
|
||
|
# bytes -= 0
|
||
|
sub $0,%ebx
|
||
|
# goto done if unsigned<=
|
||
|
jbe ._done
|
||
|
._start:
|
||
|
# in0 = *(uint32 *) (x + 0)
|
||
|
movl 0(%edx),%eax
|
||
|
# in1 = *(uint32 *) (x + 4)
|
||
|
movl 4(%edx),%ecx
|
||
|
# in2 = *(uint32 *) (x + 8)
|
||
|
movl 8(%edx),%ebp
|
||
|
# j0 = in0
|
||
|
movl %eax,164(%esp)
|
||
|
# in3 = *(uint32 *) (x + 12)
|
||
|
movl 12(%edx),%eax
|
||
|
# j1 = in1
|
||
|
movl %ecx,168(%esp)
|
||
|
# in4 = *(uint32 *) (x + 16)
|
||
|
movl 16(%edx),%ecx
|
||
|
# j2 = in2
|
||
|
movl %ebp,172(%esp)
|
||
|
# in5 = *(uint32 *) (x + 20)
|
||
|
movl 20(%edx),%ebp
|
||
|
# j3 = in3
|
||
|
movl %eax,176(%esp)
|
||
|
# in6 = *(uint32 *) (x + 24)
|
||
|
movl 24(%edx),%eax
|
||
|
# j4 = in4
|
||
|
movl %ecx,180(%esp)
|
||
|
# in7 = *(uint32 *) (x + 28)
|
||
|
movl 28(%edx),%ecx
|
||
|
# j5 = in5
|
||
|
movl %ebp,184(%esp)
|
||
|
# in8 = *(uint32 *) (x + 32)
|
||
|
movl 32(%edx),%ebp
|
||
|
# j6 = in6
|
||
|
movl %eax,188(%esp)
|
||
|
# in9 = *(uint32 *) (x + 36)
|
||
|
movl 36(%edx),%eax
|
||
|
# j7 = in7
|
||
|
movl %ecx,192(%esp)
|
||
|
# in10 = *(uint32 *) (x + 40)
|
||
|
movl 40(%edx),%ecx
|
||
|
# j8 = in8
|
||
|
movl %ebp,196(%esp)
|
||
|
# in11 = *(uint32 *) (x + 44)
|
||
|
movl 44(%edx),%ebp
|
||
|
# j9 = in9
|
||
|
movl %eax,200(%esp)
|
||
|
# in12 = *(uint32 *) (x + 48)
|
||
|
movl 48(%edx),%eax
|
||
|
# j10 = in10
|
||
|
movl %ecx,204(%esp)
|
||
|
# in13 = *(uint32 *) (x + 52)
|
||
|
movl 52(%edx),%ecx
|
||
|
# j11 = in11
|
||
|
movl %ebp,208(%esp)
|
||
|
# in14 = *(uint32 *) (x + 56)
|
||
|
movl 56(%edx),%ebp
|
||
|
# j12 = in12
|
||
|
movl %eax,212(%esp)
|
||
|
# in15 = *(uint32 *) (x + 60)
|
||
|
movl 60(%edx),%eax
|
||
|
# j13 = in13
|
||
|
movl %ecx,216(%esp)
|
||
|
# j14 = in14
|
||
|
movl %ebp,220(%esp)
|
||
|
# j15 = in15
|
||
|
movl %eax,224(%esp)
|
||
|
# x_backup = x
|
||
|
movl %edx,64(%esp)
|
||
|
._bytesatleast1:
|
||
|
# bytes - 64
|
||
|
cmp $64,%ebx
|
||
|
# goto nocopy if unsigned>=
|
||
|
jae ._nocopy
|
||
|
# ctarget = out
|
||
|
movl %edi,228(%esp)
|
||
|
# out = &tmp
|
||
|
leal 0(%esp),%edi
|
||
|
# i = bytes
|
||
|
mov %ebx,%ecx
|
||
|
# while (i) { *out++ = *m++; --i }
|
||
|
rep movsb
|
||
|
# out = &tmp
|
||
|
leal 0(%esp),%edi
|
||
|
# m = &tmp
|
||
|
leal 0(%esp),%esi
|
||
|
._nocopy:
|
||
|
# out_backup = out
|
||
|
movl %edi,72(%esp)
|
||
|
# m_backup = m
|
||
|
movl %esi,68(%esp)
|
||
|
# bytes_backup = bytes
|
||
|
movl %ebx,76(%esp)
|
||
|
# in0 = j0
|
||
|
movl 164(%esp),%eax
|
||
|
# in1 = j1
|
||
|
movl 168(%esp),%ecx
|
||
|
# in2 = j2
|
||
|
movl 172(%esp),%edx
|
||
|
# in3 = j3
|
||
|
movl 176(%esp),%ebx
|
||
|
# x0 = in0
|
||
|
movl %eax,100(%esp)
|
||
|
# x1 = in1
|
||
|
movl %ecx,104(%esp)
|
||
|
# x2 = in2
|
||
|
movl %edx,108(%esp)
|
||
|
# x3 = in3
|
||
|
movl %ebx,112(%esp)
|
||
|
# in4 = j4
|
||
|
movl 180(%esp),%eax
|
||
|
# in5 = j5
|
||
|
movl 184(%esp),%ecx
|
||
|
# in6 = j6
|
||
|
movl 188(%esp),%edx
|
||
|
# in7 = j7
|
||
|
movl 192(%esp),%ebx
|
||
|
# x4 = in4
|
||
|
movl %eax,116(%esp)
|
||
|
# x5 = in5
|
||
|
movl %ecx,120(%esp)
|
||
|
# x6 = in6
|
||
|
movl %edx,124(%esp)
|
||
|
# x7 = in7
|
||
|
movl %ebx,128(%esp)
|
||
|
# in8 = j8
|
||
|
movl 196(%esp),%eax
|
||
|
# in9 = j9
|
||
|
movl 200(%esp),%ecx
|
||
|
# in10 = j10
|
||
|
movl 204(%esp),%edx
|
||
|
# in11 = j11
|
||
|
movl 208(%esp),%ebx
|
||
|
# x8 = in8
|
||
|
movl %eax,132(%esp)
|
||
|
# x9 = in9
|
||
|
movl %ecx,136(%esp)
|
||
|
# x10 = in10
|
||
|
movl %edx,140(%esp)
|
||
|
# x11 = in11
|
||
|
movl %ebx,144(%esp)
|
||
|
# in12 = j12
|
||
|
movl 212(%esp),%eax
|
||
|
# in13 = j13
|
||
|
movl 216(%esp),%ecx
|
||
|
# in14 = j14
|
||
|
movl 220(%esp),%edx
|
||
|
# in15 = j15
|
||
|
movl 224(%esp),%ebx
|
||
|
# x12 = in12
|
||
|
movl %eax,148(%esp)
|
||
|
# x13 = in13
|
||
|
movl %ecx,152(%esp)
|
||
|
# x14 = in14
|
||
|
movl %edx,156(%esp)
|
||
|
# x15 = in15
|
||
|
movl %ebx,160(%esp)
|
||
|
# i = 20
|
||
|
mov $20,%ebp
|
||
|
# p = x0
|
||
|
movl 100(%esp),%eax
|
||
|
# s = x5
|
||
|
movl 120(%esp),%ecx
|
||
|
# t = x10
|
||
|
movl 140(%esp),%edx
|
||
|
# w = x15
|
||
|
movl 160(%esp),%ebx
|
||
|
._mainloop:
|
||
|
# x0 = p
|
||
|
movl %eax,100(%esp)
|
||
|
# x10 = t
|
||
|
movl %edx,140(%esp)
|
||
|
# p += x12
|
||
|
addl 148(%esp),%eax
|
||
|
# x5 = s
|
||
|
movl %ecx,120(%esp)
|
||
|
# t += x6
|
||
|
addl 124(%esp),%edx
|
||
|
# x15 = w
|
||
|
movl %ebx,160(%esp)
|
||
|
# r = x1
|
||
|
movl 104(%esp),%esi
|
||
|
# r += s
|
||
|
add %ecx,%esi
|
||
|
# v = x11
|
||
|
movl 144(%esp),%edi
|
||
|
# v += w
|
||
|
add %ebx,%edi
|
||
|
# p <<<= 7
|
||
|
rol $7,%eax
|
||
|
# p ^= x4
|
||
|
xorl 116(%esp),%eax
|
||
|
# t <<<= 7
|
||
|
rol $7,%edx
|
||
|
# t ^= x14
|
||
|
xorl 156(%esp),%edx
|
||
|
# r <<<= 7
|
||
|
rol $7,%esi
|
||
|
# r ^= x9
|
||
|
xorl 136(%esp),%esi
|
||
|
# v <<<= 7
|
||
|
rol $7,%edi
|
||
|
# v ^= x3
|
||
|
xorl 112(%esp),%edi
|
||
|
# x4 = p
|
||
|
movl %eax,116(%esp)
|
||
|
# x14 = t
|
||
|
movl %edx,156(%esp)
|
||
|
# p += x0
|
||
|
addl 100(%esp),%eax
|
||
|
# x9 = r
|
||
|
movl %esi,136(%esp)
|
||
|
# t += x10
|
||
|
addl 140(%esp),%edx
|
||
|
# x3 = v
|
||
|
movl %edi,112(%esp)
|
||
|
# p <<<= 9
|
||
|
rol $9,%eax
|
||
|
# p ^= x8
|
||
|
xorl 132(%esp),%eax
|
||
|
# t <<<= 9
|
||
|
rol $9,%edx
|
||
|
# t ^= x2
|
||
|
xorl 108(%esp),%edx
|
||
|
# s += r
|
||
|
add %esi,%ecx
|
||
|
# s <<<= 9
|
||
|
rol $9,%ecx
|
||
|
# s ^= x13
|
||
|
xorl 152(%esp),%ecx
|
||
|
# w += v
|
||
|
add %edi,%ebx
|
||
|
# w <<<= 9
|
||
|
rol $9,%ebx
|
||
|
# w ^= x7
|
||
|
xorl 128(%esp),%ebx
|
||
|
# x8 = p
|
||
|
movl %eax,132(%esp)
|
||
|
# x2 = t
|
||
|
movl %edx,108(%esp)
|
||
|
# p += x4
|
||
|
addl 116(%esp),%eax
|
||
|
# x13 = s
|
||
|
movl %ecx,152(%esp)
|
||
|
# t += x14
|
||
|
addl 156(%esp),%edx
|
||
|
# x7 = w
|
||
|
movl %ebx,128(%esp)
|
||
|
# p <<<= 13
|
||
|
rol $13,%eax
|
||
|
# p ^= x12
|
||
|
xorl 148(%esp),%eax
|
||
|
# t <<<= 13
|
||
|
rol $13,%edx
|
||
|
# t ^= x6
|
||
|
xorl 124(%esp),%edx
|
||
|
# r += s
|
||
|
add %ecx,%esi
|
||
|
# r <<<= 13
|
||
|
rol $13,%esi
|
||
|
# r ^= x1
|
||
|
xorl 104(%esp),%esi
|
||
|
# v += w
|
||
|
add %ebx,%edi
|
||
|
# v <<<= 13
|
||
|
rol $13,%edi
|
||
|
# v ^= x11
|
||
|
xorl 144(%esp),%edi
|
||
|
# x12 = p
|
||
|
movl %eax,148(%esp)
|
||
|
# x6 = t
|
||
|
movl %edx,124(%esp)
|
||
|
# p += x8
|
||
|
addl 132(%esp),%eax
|
||
|
# x1 = r
|
||
|
movl %esi,104(%esp)
|
||
|
# t += x2
|
||
|
addl 108(%esp),%edx
|
||
|
# x11 = v
|
||
|
movl %edi,144(%esp)
|
||
|
# p <<<= 18
|
||
|
rol $18,%eax
|
||
|
# p ^= x0
|
||
|
xorl 100(%esp),%eax
|
||
|
# t <<<= 18
|
||
|
rol $18,%edx
|
||
|
# t ^= x10
|
||
|
xorl 140(%esp),%edx
|
||
|
# s += r
|
||
|
add %esi,%ecx
|
||
|
# s <<<= 18
|
||
|
rol $18,%ecx
|
||
|
# s ^= x5
|
||
|
xorl 120(%esp),%ecx
|
||
|
# w += v
|
||
|
add %edi,%ebx
|
||
|
# w <<<= 18
|
||
|
rol $18,%ebx
|
||
|
# w ^= x15
|
||
|
xorl 160(%esp),%ebx
|
||
|
# x0 = p
|
||
|
movl %eax,100(%esp)
|
||
|
# x10 = t
|
||
|
movl %edx,140(%esp)
|
||
|
# p += x3
|
||
|
addl 112(%esp),%eax
|
||
|
# p <<<= 7
|
||
|
rol $7,%eax
|
||
|
# x5 = s
|
||
|
movl %ecx,120(%esp)
|
||
|
# t += x9
|
||
|
addl 136(%esp),%edx
|
||
|
# x15 = w
|
||
|
movl %ebx,160(%esp)
|
||
|
# r = x4
|
||
|
movl 116(%esp),%esi
|
||
|
# r += s
|
||
|
add %ecx,%esi
|
||
|
# v = x14
|
||
|
movl 156(%esp),%edi
|
||
|
# v += w
|
||
|
add %ebx,%edi
|
||
|
# p ^= x1
|
||
|
xorl 104(%esp),%eax
|
||
|
# t <<<= 7
|
||
|
rol $7,%edx
|
||
|
# t ^= x11
|
||
|
xorl 144(%esp),%edx
|
||
|
# r <<<= 7
|
||
|
rol $7,%esi
|
||
|
# r ^= x6
|
||
|
xorl 124(%esp),%esi
|
||
|
# v <<<= 7
|
||
|
rol $7,%edi
|
||
|
# v ^= x12
|
||
|
xorl 148(%esp),%edi
|
||
|
# x1 = p
|
||
|
movl %eax,104(%esp)
|
||
|
# x11 = t
|
||
|
movl %edx,144(%esp)
|
||
|
# p += x0
|
||
|
addl 100(%esp),%eax
|
||
|
# x6 = r
|
||
|
movl %esi,124(%esp)
|
||
|
# t += x10
|
||
|
addl 140(%esp),%edx
|
||
|
# x12 = v
|
||
|
movl %edi,148(%esp)
|
||
|
# p <<<= 9
|
||
|
rol $9,%eax
|
||
|
# p ^= x2
|
||
|
xorl 108(%esp),%eax
|
||
|
# t <<<= 9
|
||
|
rol $9,%edx
|
||
|
# t ^= x8
|
||
|
xorl 132(%esp),%edx
|
||
|
# s += r
|
||
|
add %esi,%ecx
|
||
|
# s <<<= 9
|
||
|
rol $9,%ecx
|
||
|
# s ^= x7
|
||
|
xorl 128(%esp),%ecx
|
||
|
# w += v
|
||
|
add %edi,%ebx
|
||
|
# w <<<= 9
|
||
|
rol $9,%ebx
|
||
|
# w ^= x13
|
||
|
xorl 152(%esp),%ebx
|
||
|
# x2 = p
|
||
|
movl %eax,108(%esp)
|
||
|
# x8 = t
|
||
|
movl %edx,132(%esp)
|
||
|
# p += x1
|
||
|
addl 104(%esp),%eax
|
||
|
# x7 = s
|
||
|
movl %ecx,128(%esp)
|
||
|
# t += x11
|
||
|
addl 144(%esp),%edx
|
||
|
# x13 = w
|
||
|
movl %ebx,152(%esp)
|
||
|
# p <<<= 13
|
||
|
rol $13,%eax
|
||
|
# p ^= x3
|
||
|
xorl 112(%esp),%eax
|
||
|
# t <<<= 13
|
||
|
rol $13,%edx
|
||
|
# t ^= x9
|
||
|
xorl 136(%esp),%edx
|
||
|
# r += s
|
||
|
add %ecx,%esi
|
||
|
# r <<<= 13
|
||
|
rol $13,%esi
|
||
|
# r ^= x4
|
||
|
xorl 116(%esp),%esi
|
||
|
# v += w
|
||
|
add %ebx,%edi
|
||
|
# v <<<= 13
|
||
|
rol $13,%edi
|
||
|
# v ^= x14
|
||
|
xorl 156(%esp),%edi
|
||
|
# x3 = p
|
||
|
movl %eax,112(%esp)
|
||
|
# x9 = t
|
||
|
movl %edx,136(%esp)
|
||
|
# p += x2
|
||
|
addl 108(%esp),%eax
|
||
|
# x4 = r
|
||
|
movl %esi,116(%esp)
|
||
|
# t += x8
|
||
|
addl 132(%esp),%edx
|
||
|
# x14 = v
|
||
|
movl %edi,156(%esp)
|
||
|
# p <<<= 18
|
||
|
rol $18,%eax
|
||
|
# p ^= x0
|
||
|
xorl 100(%esp),%eax
|
||
|
# t <<<= 18
|
||
|
rol $18,%edx
|
||
|
# t ^= x10
|
||
|
xorl 140(%esp),%edx
|
||
|
# s += r
|
||
|
add %esi,%ecx
|
||
|
# s <<<= 18
|
||
|
rol $18,%ecx
|
||
|
# s ^= x5
|
||
|
xorl 120(%esp),%ecx
|
||
|
# w += v
|
||
|
add %edi,%ebx
|
||
|
# w <<<= 18
|
||
|
rol $18,%ebx
|
||
|
# w ^= x15
|
||
|
xorl 160(%esp),%ebx
|
||
|
# x0 = p
|
||
|
movl %eax,100(%esp)
|
||
|
# x10 = t
|
||
|
movl %edx,140(%esp)
|
||
|
# p += x12
|
||
|
addl 148(%esp),%eax
|
||
|
# x5 = s
|
||
|
movl %ecx,120(%esp)
|
||
|
# t += x6
|
||
|
addl 124(%esp),%edx
|
||
|
# x15 = w
|
||
|
movl %ebx,160(%esp)
|
||
|
# r = x1
|
||
|
movl 104(%esp),%esi
|
||
|
# r += s
|
||
|
add %ecx,%esi
|
||
|
# v = x11
|
||
|
movl 144(%esp),%edi
|
||
|
# v += w
|
||
|
add %ebx,%edi
|
||
|
# p <<<= 7
|
||
|
rol $7,%eax
|
||
|
# p ^= x4
|
||
|
xorl 116(%esp),%eax
|
||
|
# t <<<= 7
|
||
|
rol $7,%edx
|
||
|
# t ^= x14
|
||
|
xorl 156(%esp),%edx
|
||
|
# r <<<= 7
|
||
|
rol $7,%esi
|
||
|
# r ^= x9
|
||
|
xorl 136(%esp),%esi
|
||
|
# v <<<= 7
|
||
|
rol $7,%edi
|
||
|
# v ^= x3
|
||
|
xorl 112(%esp),%edi
|
||
|
# x4 = p
|
||
|
movl %eax,116(%esp)
|
||
|
# x14 = t
|
||
|
movl %edx,156(%esp)
|
||
|
# p += x0
|
||
|
addl 100(%esp),%eax
|
||
|
# x9 = r
|
||
|
movl %esi,136(%esp)
|
||
|
# t += x10
|
||
|
addl 140(%esp),%edx
|
||
|
# x3 = v
|
||
|
movl %edi,112(%esp)
|
||
|
# p <<<= 9
|
||
|
rol $9,%eax
|
||
|
# p ^= x8
|
||
|
xorl 132(%esp),%eax
|
||
|
# t <<<= 9
|
||
|
rol $9,%edx
|
||
|
# t ^= x2
|
||
|
xorl 108(%esp),%edx
|
||
|
# s += r
|
||
|
add %esi,%ecx
|
||
|
# s <<<= 9
|
||
|
rol $9,%ecx
|
||
|
# s ^= x13
|
||
|
xorl 152(%esp),%ecx
|
||
|
# w += v
|
||
|
add %edi,%ebx
|
||
|
# w <<<= 9
|
||
|
rol $9,%ebx
|
||
|
# w ^= x7
|
||
|
xorl 128(%esp),%ebx
|
||
|
# x8 = p
|
||
|
movl %eax,132(%esp)
|
||
|
# x2 = t
|
||
|
movl %edx,108(%esp)
|
||
|
# p += x4
|
||
|
addl 116(%esp),%eax
|
||
|
# x13 = s
|
||
|
movl %ecx,152(%esp)
|
||
|
# t += x14
|
||
|
addl 156(%esp),%edx
|
||
|
# x7 = w
|
||
|
movl %ebx,128(%esp)
|
||
|
# p <<<= 13
|
||
|
rol $13,%eax
|
||
|
# p ^= x12
|
||
|
xorl 148(%esp),%eax
|
||
|
# t <<<= 13
|
||
|
rol $13,%edx
|
||
|
# t ^= x6
|
||
|
xorl 124(%esp),%edx
|
||
|
# r += s
|
||
|
add %ecx,%esi
|
||
|
# r <<<= 13
|
||
|
rol $13,%esi
|
||
|
# r ^= x1
|
||
|
xorl 104(%esp),%esi
|
||
|
# v += w
|
||
|
add %ebx,%edi
|
||
|
# v <<<= 13
|
||
|
rol $13,%edi
|
||
|
# v ^= x11
|
||
|
xorl 144(%esp),%edi
|
||
|
# x12 = p
|
||
|
movl %eax,148(%esp)
|
||
|
# x6 = t
|
||
|
movl %edx,124(%esp)
|
||
|
# p += x8
|
||
|
addl 132(%esp),%eax
|
||
|
# x1 = r
|
||
|
movl %esi,104(%esp)
|
||
|
# t += x2
|
||
|
addl 108(%esp),%edx
|
||
|
# x11 = v
|
||
|
movl %edi,144(%esp)
|
||
|
# p <<<= 18
|
||
|
rol $18,%eax
|
||
|
# p ^= x0
|
||
|
xorl 100(%esp),%eax
|
||
|
# t <<<= 18
|
||
|
rol $18,%edx
|
||
|
# t ^= x10
|
||
|
xorl 140(%esp),%edx
|
||
|
# s += r
|
||
|
add %esi,%ecx
|
||
|
# s <<<= 18
|
||
|
rol $18,%ecx
|
||
|
# s ^= x5
|
||
|
xorl 120(%esp),%ecx
|
||
|
# w += v
|
||
|
add %edi,%ebx
|
||
|
# w <<<= 18
|
||
|
rol $18,%ebx
|
||
|
# w ^= x15
|
||
|
xorl 160(%esp),%ebx
|
||
|
# x0 = p
|
||
|
movl %eax,100(%esp)
|
||
|
# x10 = t
|
||
|
movl %edx,140(%esp)
|
||
|
# p += x3
|
||
|
addl 112(%esp),%eax
|
||
|
# p <<<= 7
|
||
|
rol $7,%eax
|
||
|
# x5 = s
|
||
|
movl %ecx,120(%esp)
|
||
|
# t += x9
|
||
|
addl 136(%esp),%edx
|
||
|
# x15 = w
|
||
|
movl %ebx,160(%esp)
|
||
|
# r = x4
|
||
|
movl 116(%esp),%esi
|
||
|
# r += s
|
||
|
add %ecx,%esi
|
||
|
# v = x14
|
||
|
movl 156(%esp),%edi
|
||
|
# v += w
|
||
|
add %ebx,%edi
|
||
|
# p ^= x1
|
||
|
xorl 104(%esp),%eax
|
||
|
# t <<<= 7
|
||
|
rol $7,%edx
|
||
|
# t ^= x11
|
||
|
xorl 144(%esp),%edx
|
||
|
# r <<<= 7
|
||
|
rol $7,%esi
|
||
|
# r ^= x6
|
||
|
xorl 124(%esp),%esi
|
||
|
# v <<<= 7
|
||
|
rol $7,%edi
|
||
|
# v ^= x12
|
||
|
xorl 148(%esp),%edi
|
||
|
# x1 = p
|
||
|
movl %eax,104(%esp)
|
||
|
# x11 = t
|
||
|
movl %edx,144(%esp)
|
||
|
# p += x0
|
||
|
addl 100(%esp),%eax
|
||
|
# x6 = r
|
||
|
movl %esi,124(%esp)
|
||
|
# t += x10
|
||
|
addl 140(%esp),%edx
|
||
|
# x12 = v
|
||
|
movl %edi,148(%esp)
|
||
|
# p <<<= 9
|
||
|
rol $9,%eax
|
||
|
# p ^= x2
|
||
|
xorl 108(%esp),%eax
|
||
|
# t <<<= 9
|
||
|
rol $9,%edx
|
||
|
# t ^= x8
|
||
|
xorl 132(%esp),%edx
|
||
|
# s += r
|
||
|
add %esi,%ecx
|
||
|
# s <<<= 9
|
||
|
rol $9,%ecx
|
||
|
# s ^= x7
|
||
|
xorl 128(%esp),%ecx
|
||
|
# w += v
|
||
|
add %edi,%ebx
|
||
|
# w <<<= 9
|
||
|
rol $9,%ebx
|
||
|
# w ^= x13
|
||
|
xorl 152(%esp),%ebx
|
||
|
# x2 = p
|
||
|
movl %eax,108(%esp)
|
||
|
# x8 = t
|
||
|
movl %edx,132(%esp)
|
||
|
# p += x1
|
||
|
addl 104(%esp),%eax
|
||
|
# x7 = s
|
||
|
movl %ecx,128(%esp)
|
||
|
# t += x11
|
||
|
addl 144(%esp),%edx
|
||
|
# x13 = w
|
||
|
movl %ebx,152(%esp)
|
||
|
# p <<<= 13
|
||
|
rol $13,%eax
|
||
|
# p ^= x3
|
||
|
xorl 112(%esp),%eax
|
||
|
# t <<<= 13
|
||
|
rol $13,%edx
|
||
|
# t ^= x9
|
||
|
xorl 136(%esp),%edx
|
||
|
# r += s
|
||
|
add %ecx,%esi
|
||
|
# r <<<= 13
|
||
|
rol $13,%esi
|
||
|
# r ^= x4
|
||
|
xorl 116(%esp),%esi
|
||
|
# v += w
|
||
|
add %ebx,%edi
|
||
|
# v <<<= 13
|
||
|
rol $13,%edi
|
||
|
# v ^= x14
|
||
|
xorl 156(%esp),%edi
|
||
|
# x3 = p
|
||
|
movl %eax,112(%esp)
|
||
|
# x9 = t
|
||
|
movl %edx,136(%esp)
|
||
|
# p += x2
|
||
|
addl 108(%esp),%eax
|
||
|
# x4 = r
|
||
|
movl %esi,116(%esp)
|
||
|
# t += x8
|
||
|
addl 132(%esp),%edx
|
||
|
# x14 = v
|
||
|
movl %edi,156(%esp)
|
||
|
# p <<<= 18
|
||
|
rol $18,%eax
|
||
|
# p ^= x0
|
||
|
xorl 100(%esp),%eax
|
||
|
# t <<<= 18
|
||
|
rol $18,%edx
|
||
|
# t ^= x10
|
||
|
xorl 140(%esp),%edx
|
||
|
# s += r
|
||
|
add %esi,%ecx
|
||
|
# s <<<= 18
|
||
|
rol $18,%ecx
|
||
|
# s ^= x5
|
||
|
xorl 120(%esp),%ecx
|
||
|
# w += v
|
||
|
add %edi,%ebx
|
||
|
# w <<<= 18
|
||
|
rol $18,%ebx
|
||
|
# w ^= x15
|
||
|
xorl 160(%esp),%ebx
|
||
|
# i -= 4
|
||
|
sub $4,%ebp
|
||
|
# goto mainloop if unsigned >
|
||
|
ja ._mainloop
|
||
|
# x0 = p
|
||
|
movl %eax,100(%esp)
|
||
|
# x5 = s
|
||
|
movl %ecx,120(%esp)
|
||
|
# x10 = t
|
||
|
movl %edx,140(%esp)
|
||
|
# x15 = w
|
||
|
movl %ebx,160(%esp)
|
||
|
# out = out_backup
|
||
|
movl 72(%esp),%edi
|
||
|
# m = m_backup
|
||
|
movl 68(%esp),%esi
|
||
|
# in0 = x0
|
||
|
movl 100(%esp),%eax
|
||
|
# in1 = x1
|
||
|
movl 104(%esp),%ecx
|
||
|
# in0 += j0
|
||
|
addl 164(%esp),%eax
|
||
|
# in1 += j1
|
||
|
addl 168(%esp),%ecx
|
||
|
# in0 ^= *(uint32 *) (m + 0)
|
||
|
xorl 0(%esi),%eax
|
||
|
# in1 ^= *(uint32 *) (m + 4)
|
||
|
xorl 4(%esi),%ecx
|
||
|
# *(uint32 *) (out + 0) = in0
|
||
|
movl %eax,0(%edi)
|
||
|
# *(uint32 *) (out + 4) = in1
|
||
|
movl %ecx,4(%edi)
|
||
|
# in2 = x2
|
||
|
movl 108(%esp),%eax
|
||
|
# in3 = x3
|
||
|
movl 112(%esp),%ecx
|
||
|
# in2 += j2
|
||
|
addl 172(%esp),%eax
|
||
|
# in3 += j3
|
||
|
addl 176(%esp),%ecx
|
||
|
# in2 ^= *(uint32 *) (m + 8)
|
||
|
xorl 8(%esi),%eax
|
||
|
# in3 ^= *(uint32 *) (m + 12)
|
||
|
xorl 12(%esi),%ecx
|
||
|
# *(uint32 *) (out + 8) = in2
|
||
|
movl %eax,8(%edi)
|
||
|
# *(uint32 *) (out + 12) = in3
|
||
|
movl %ecx,12(%edi)
|
||
|
# in4 = x4
|
||
|
movl 116(%esp),%eax
|
||
|
# in5 = x5
|
||
|
movl 120(%esp),%ecx
|
||
|
# in4 += j4
|
||
|
addl 180(%esp),%eax
|
||
|
# in5 += j5
|
||
|
addl 184(%esp),%ecx
|
||
|
# in4 ^= *(uint32 *) (m + 16)
|
||
|
xorl 16(%esi),%eax
|
||
|
# in5 ^= *(uint32 *) (m + 20)
|
||
|
xorl 20(%esi),%ecx
|
||
|
# *(uint32 *) (out + 16) = in4
|
||
|
movl %eax,16(%edi)
|
||
|
# *(uint32 *) (out + 20) = in5
|
||
|
movl %ecx,20(%edi)
|
||
|
# in6 = x6
|
||
|
movl 124(%esp),%eax
|
||
|
# in7 = x7
|
||
|
movl 128(%esp),%ecx
|
||
|
# in6 += j6
|
||
|
addl 188(%esp),%eax
|
||
|
# in7 += j7
|
||
|
addl 192(%esp),%ecx
|
||
|
# in6 ^= *(uint32 *) (m + 24)
|
||
|
xorl 24(%esi),%eax
|
||
|
# in7 ^= *(uint32 *) (m + 28)
|
||
|
xorl 28(%esi),%ecx
|
||
|
# *(uint32 *) (out + 24) = in6
|
||
|
movl %eax,24(%edi)
|
||
|
# *(uint32 *) (out + 28) = in7
|
||
|
movl %ecx,28(%edi)
|
||
|
# in8 = x8
|
||
|
movl 132(%esp),%eax
|
||
|
# in9 = x9
|
||
|
movl 136(%esp),%ecx
|
||
|
# in8 += j8
|
||
|
addl 196(%esp),%eax
|
||
|
# in9 += j9
|
||
|
addl 200(%esp),%ecx
|
||
|
# in8 ^= *(uint32 *) (m + 32)
|
||
|
xorl 32(%esi),%eax
|
||
|
# in9 ^= *(uint32 *) (m + 36)
|
||
|
xorl 36(%esi),%ecx
|
||
|
# *(uint32 *) (out + 32) = in8
|
||
|
movl %eax,32(%edi)
|
||
|
# *(uint32 *) (out + 36) = in9
|
||
|
movl %ecx,36(%edi)
|
||
|
# in10 = x10
|
||
|
movl 140(%esp),%eax
|
||
|
# in11 = x11
|
||
|
movl 144(%esp),%ecx
|
||
|
# in10 += j10
|
||
|
addl 204(%esp),%eax
|
||
|
# in11 += j11
|
||
|
addl 208(%esp),%ecx
|
||
|
# in10 ^= *(uint32 *) (m + 40)
|
||
|
xorl 40(%esi),%eax
|
||
|
# in11 ^= *(uint32 *) (m + 44)
|
||
|
xorl 44(%esi),%ecx
|
||
|
# *(uint32 *) (out + 40) = in10
|
||
|
movl %eax,40(%edi)
|
||
|
# *(uint32 *) (out + 44) = in11
|
||
|
movl %ecx,44(%edi)
|
||
|
# in12 = x12
|
||
|
movl 148(%esp),%eax
|
||
|
# in13 = x13
|
||
|
movl 152(%esp),%ecx
|
||
|
# in12 += j12
|
||
|
addl 212(%esp),%eax
|
||
|
# in13 += j13
|
||
|
addl 216(%esp),%ecx
|
||
|
# in12 ^= *(uint32 *) (m + 48)
|
||
|
xorl 48(%esi),%eax
|
||
|
# in13 ^= *(uint32 *) (m + 52)
|
||
|
xorl 52(%esi),%ecx
|
||
|
# *(uint32 *) (out + 48) = in12
|
||
|
movl %eax,48(%edi)
|
||
|
# *(uint32 *) (out + 52) = in13
|
||
|
movl %ecx,52(%edi)
|
||
|
# in14 = x14
|
||
|
movl 156(%esp),%eax
|
||
|
# in15 = x15
|
||
|
movl 160(%esp),%ecx
|
||
|
# in14 += j14
|
||
|
addl 220(%esp),%eax
|
||
|
# in15 += j15
|
||
|
addl 224(%esp),%ecx
|
||
|
# in14 ^= *(uint32 *) (m + 56)
|
||
|
xorl 56(%esi),%eax
|
||
|
# in15 ^= *(uint32 *) (m + 60)
|
||
|
xorl 60(%esi),%ecx
|
||
|
# *(uint32 *) (out + 56) = in14
|
||
|
movl %eax,56(%edi)
|
||
|
# *(uint32 *) (out + 60) = in15
|
||
|
movl %ecx,60(%edi)
|
||
|
# bytes = bytes_backup
|
||
|
movl 76(%esp),%ebx
|
||
|
# in8 = j8
|
||
|
movl 196(%esp),%eax
|
||
|
# in9 = j9
|
||
|
movl 200(%esp),%ecx
|
||
|
# in8 += 1
|
||
|
add $1,%eax
|
||
|
# in9 += 0 + carry
|
||
|
adc $0,%ecx
|
||
|
# j8 = in8
|
||
|
movl %eax,196(%esp)
|
||
|
# j9 = in9
|
||
|
movl %ecx,200(%esp)
|
||
|
# bytes - 64
|
||
|
cmp $64,%ebx
|
||
|
# goto bytesatleast65 if unsigned>
|
||
|
ja ._bytesatleast65
|
||
|
# goto bytesatleast64 if unsigned>=
|
||
|
jae ._bytesatleast64
|
||
|
# m = out
|
||
|
mov %edi,%esi
|
||
|
# out = ctarget
|
||
|
movl 228(%esp),%edi
|
||
|
# i = bytes
|
||
|
mov %ebx,%ecx
|
||
|
# while (i) { *out++ = *m++; --i }
|
||
|
rep movsb
|
||
|
._bytesatleast64:
|
||
|
# x = x_backup
|
||
|
movl 64(%esp),%eax
|
||
|
# in8 = j8
|
||
|
movl 196(%esp),%ecx
|
||
|
# in9 = j9
|
||
|
movl 200(%esp),%edx
|
||
|
# *(uint32 *) (x + 32) = in8
|
||
|
movl %ecx,32(%eax)
|
||
|
# *(uint32 *) (x + 36) = in9
|
||
|
movl %edx,36(%eax)
|
||
|
._done:
|
||
|
# eax = eax_stack
|
||
|
movl 80(%esp),%eax
|
||
|
# ebx = ebx_stack
|
||
|
movl 84(%esp),%ebx
|
||
|
# esi = esi_stack
|
||
|
movl 88(%esp),%esi
|
||
|
# edi = edi_stack
|
||
|
movl 92(%esp),%edi
|
||
|
# ebp = ebp_stack
|
||
|
movl 96(%esp),%ebp
|
||
|
# leave
|
||
|
add %eax,%esp
|
||
|
ret
|
||
|
._bytesatleast65:
|
||
|
# bytes -= 64
|
||
|
sub $64,%ebx
|
||
|
# out += 64
|
||
|
add $64,%edi
|
||
|
# m += 64
|
||
|
add $64,%esi
|
||
|
# goto bytesatleast1
|
||
|
jmp ._bytesatleast1
|
||
|
ENDPROC(salsa20_encrypt_bytes)
|
||
|
|
||
|
# enter salsa20_keysetup
|
||
|
ENTRY(salsa20_keysetup)
|
||
|
mov %esp,%eax
|
||
|
and $31,%eax
|
||
|
add $256,%eax
|
||
|
sub %eax,%esp
|
||
|
# eax_stack = eax
|
||
|
movl %eax,64(%esp)
|
||
|
# ebx_stack = ebx
|
||
|
movl %ebx,68(%esp)
|
||
|
# esi_stack = esi
|
||
|
movl %esi,72(%esp)
|
||
|
# edi_stack = edi
|
||
|
movl %edi,76(%esp)
|
||
|
# ebp_stack = ebp
|
||
|
movl %ebp,80(%esp)
|
||
|
# k = arg2
|
||
|
movl 8(%esp,%eax),%ecx
|
||
|
# kbits = arg3
|
||
|
movl 12(%esp,%eax),%edx
|
||
|
# x = arg1
|
||
|
movl 4(%esp,%eax),%eax
|
||
|
# in1 = *(uint32 *) (k + 0)
|
||
|
movl 0(%ecx),%ebx
|
||
|
# in2 = *(uint32 *) (k + 4)
|
||
|
movl 4(%ecx),%esi
|
||
|
# in3 = *(uint32 *) (k + 8)
|
||
|
movl 8(%ecx),%edi
|
||
|
# in4 = *(uint32 *) (k + 12)
|
||
|
movl 12(%ecx),%ebp
|
||
|
# *(uint32 *) (x + 4) = in1
|
||
|
movl %ebx,4(%eax)
|
||
|
# *(uint32 *) (x + 8) = in2
|
||
|
movl %esi,8(%eax)
|
||
|
# *(uint32 *) (x + 12) = in3
|
||
|
movl %edi,12(%eax)
|
||
|
# *(uint32 *) (x + 16) = in4
|
||
|
movl %ebp,16(%eax)
|
||
|
# kbits - 256
|
||
|
cmp $256,%edx
|
||
|
# goto kbits128 if unsigned<
|
||
|
jb ._kbits128
|
||
|
._kbits256:
|
||
|
# in11 = *(uint32 *) (k + 16)
|
||
|
movl 16(%ecx),%edx
|
||
|
# in12 = *(uint32 *) (k + 20)
|
||
|
movl 20(%ecx),%ebx
|
||
|
# in13 = *(uint32 *) (k + 24)
|
||
|
movl 24(%ecx),%esi
|
||
|
# in14 = *(uint32 *) (k + 28)
|
||
|
movl 28(%ecx),%ecx
|
||
|
# *(uint32 *) (x + 44) = in11
|
||
|
movl %edx,44(%eax)
|
||
|
# *(uint32 *) (x + 48) = in12
|
||
|
movl %ebx,48(%eax)
|
||
|
# *(uint32 *) (x + 52) = in13
|
||
|
movl %esi,52(%eax)
|
||
|
# *(uint32 *) (x + 56) = in14
|
||
|
movl %ecx,56(%eax)
|
||
|
# in0 = 1634760805
|
||
|
mov $1634760805,%ecx
|
||
|
# in5 = 857760878
|
||
|
mov $857760878,%edx
|
||
|
# in10 = 2036477234
|
||
|
mov $2036477234,%ebx
|
||
|
# in15 = 1797285236
|
||
|
mov $1797285236,%esi
|
||
|
# *(uint32 *) (x + 0) = in0
|
||
|
movl %ecx,0(%eax)
|
||
|
# *(uint32 *) (x + 20) = in5
|
||
|
movl %edx,20(%eax)
|
||
|
# *(uint32 *) (x + 40) = in10
|
||
|
movl %ebx,40(%eax)
|
||
|
# *(uint32 *) (x + 60) = in15
|
||
|
movl %esi,60(%eax)
|
||
|
# goto keysetupdone
|
||
|
jmp ._keysetupdone
|
||
|
._kbits128:
|
||
|
# in11 = *(uint32 *) (k + 0)
|
||
|
movl 0(%ecx),%edx
|
||
|
# in12 = *(uint32 *) (k + 4)
|
||
|
movl 4(%ecx),%ebx
|
||
|
# in13 = *(uint32 *) (k + 8)
|
||
|
movl 8(%ecx),%esi
|
||
|
# in14 = *(uint32 *) (k + 12)
|
||
|
movl 12(%ecx),%ecx
|
||
|
# *(uint32 *) (x + 44) = in11
|
||
|
movl %edx,44(%eax)
|
||
|
# *(uint32 *) (x + 48) = in12
|
||
|
movl %ebx,48(%eax)
|
||
|
# *(uint32 *) (x + 52) = in13
|
||
|
movl %esi,52(%eax)
|
||
|
# *(uint32 *) (x + 56) = in14
|
||
|
movl %ecx,56(%eax)
|
||
|
# in0 = 1634760805
|
||
|
mov $1634760805,%ecx
|
||
|
# in5 = 824206446
|
||
|
mov $824206446,%edx
|
||
|
# in10 = 2036477238
|
||
|
mov $2036477238,%ebx
|
||
|
# in15 = 1797285236
|
||
|
mov $1797285236,%esi
|
||
|
# *(uint32 *) (x + 0) = in0
|
||
|
movl %ecx,0(%eax)
|
||
|
# *(uint32 *) (x + 20) = in5
|
||
|
movl %edx,20(%eax)
|
||
|
# *(uint32 *) (x + 40) = in10
|
||
|
movl %ebx,40(%eax)
|
||
|
# *(uint32 *) (x + 60) = in15
|
||
|
movl %esi,60(%eax)
|
||
|
._keysetupdone:
|
||
|
# eax = eax_stack
|
||
|
movl 64(%esp),%eax
|
||
|
# ebx = ebx_stack
|
||
|
movl 68(%esp),%ebx
|
||
|
# esi = esi_stack
|
||
|
movl 72(%esp),%esi
|
||
|
# edi = edi_stack
|
||
|
movl 76(%esp),%edi
|
||
|
# ebp = ebp_stack
|
||
|
movl 80(%esp),%ebp
|
||
|
# leave
|
||
|
add %eax,%esp
|
||
|
ret
|
||
|
ENDPROC(salsa20_keysetup)
|
||
|
|
||
|
# enter salsa20_ivsetup
|
||
|
ENTRY(salsa20_ivsetup)
|
||
|
mov %esp,%eax
|
||
|
and $31,%eax
|
||
|
add $256,%eax
|
||
|
sub %eax,%esp
|
||
|
# eax_stack = eax
|
||
|
movl %eax,64(%esp)
|
||
|
# ebx_stack = ebx
|
||
|
movl %ebx,68(%esp)
|
||
|
# esi_stack = esi
|
||
|
movl %esi,72(%esp)
|
||
|
# edi_stack = edi
|
||
|
movl %edi,76(%esp)
|
||
|
# ebp_stack = ebp
|
||
|
movl %ebp,80(%esp)
|
||
|
# iv = arg2
|
||
|
movl 8(%esp,%eax),%ecx
|
||
|
# x = arg1
|
||
|
movl 4(%esp,%eax),%eax
|
||
|
# in6 = *(uint32 *) (iv + 0)
|
||
|
movl 0(%ecx),%edx
|
||
|
# in7 = *(uint32 *) (iv + 4)
|
||
|
movl 4(%ecx),%ecx
|
||
|
# in8 = 0
|
||
|
mov $0,%ebx
|
||
|
# in9 = 0
|
||
|
mov $0,%esi
|
||
|
# *(uint32 *) (x + 24) = in6
|
||
|
movl %edx,24(%eax)
|
||
|
# *(uint32 *) (x + 28) = in7
|
||
|
movl %ecx,28(%eax)
|
||
|
# *(uint32 *) (x + 32) = in8
|
||
|
movl %ebx,32(%eax)
|
||
|
# *(uint32 *) (x + 36) = in9
|
||
|
movl %esi,36(%eax)
|
||
|
# eax = eax_stack
|
||
|
movl 64(%esp),%eax
|
||
|
# ebx = ebx_stack
|
||
|
movl 68(%esp),%ebx
|
||
|
# esi = esi_stack
|
||
|
movl 72(%esp),%esi
|
||
|
# edi = edi_stack
|
||
|
movl 76(%esp),%edi
|
||
|
# ebp = ebp_stack
|
||
|
movl 80(%esp),%ebp
|
||
|
# leave
|
||
|
add %eax,%esp
|
||
|
ret
|
||
|
ENDPROC(salsa20_ivsetup)
|