mirror of
https://github.com/veracrypt/VeraCrypt.git
synced 2025-11-12 11:28:26 -06:00
331 lines
5.0 KiB
NASM
331 lines
5.0 KiB
NASM
;
|
|
; Copyright (c) 2010 TrueCrypt Developers Association. All rights reserved.
|
|
;
|
|
; Governed by the TrueCrypt License 3.0 the full text of which is contained in
|
|
; the file License.txt included in TrueCrypt binary and source code distribution
|
|
; packages.
|
|
;
|
|
|
|
|
|
%ifidn __BITS__, 16
|
|
%define R e
|
|
%elifidn __BITS__, 32
|
|
%define R e
|
|
%elifidn __BITS__, 64
|
|
%define R r
|
|
%endif
|
|
|
|
|
|
%macro export_function 1-2 0
|
|
|
|
%ifdef MS_STDCALL
|
|
global %1@%2
|
|
export _%1@%2
|
|
%1@%2:
|
|
%elifidn __BITS__, 16
|
|
global _%1
|
|
_%1:
|
|
%else
|
|
global %1
|
|
%1:
|
|
%endif
|
|
|
|
%endmacro
|
|
|
|
|
|
%macro aes_function_entry 1
|
|
|
|
; void (const byte *ks, byte *data);
|
|
|
|
export_function %1, 8
|
|
|
|
%ifidn __BITS__, 32
|
|
mov ecx, [esp + 4 + 4 * 0]
|
|
mov edx, [esp + 4 + 4 * 1]
|
|
%elifidn __BITS__, 64
|
|
%ifnidn __OUTPUT_FORMAT__, win64
|
|
mov rcx, rdi
|
|
mov rdx, rsi
|
|
%endif
|
|
%endif
|
|
|
|
; ecx/rcx = ks
|
|
; edx/rdx = data
|
|
|
|
%endmacro
|
|
|
|
|
|
%macro aes_function_exit 0
|
|
|
|
; void (const byte *, byte *);
|
|
|
|
%ifdef MS_STDCALL
|
|
ret 8
|
|
%else
|
|
ret
|
|
%endif
|
|
|
|
%endmacro
|
|
|
|
|
|
%macro push_xmm 2
|
|
sub rsp, 16 * (%2 - %1 + 1)
|
|
|
|
%assign stackoffset 0
|
|
%assign regnumber %1
|
|
|
|
%rep (%2 - %1 + 1)
|
|
movdqu [rsp + 16 * stackoffset], xmm%[regnumber]
|
|
|
|
%assign stackoffset stackoffset+1
|
|
%assign regnumber regnumber+1
|
|
%endrep
|
|
%endmacro
|
|
|
|
|
|
%macro pop_xmm 2
|
|
%assign stackoffset 0
|
|
%assign regnumber %1
|
|
|
|
%rep (%2 - %1 + 1)
|
|
movdqu xmm%[regnumber], [rsp + 16 * stackoffset]
|
|
|
|
%assign stackoffset stackoffset+1
|
|
%assign regnumber regnumber+1
|
|
%endrep
|
|
|
|
add rsp, 16 * (%2 - %1 + 1)
|
|
%endmacro
|
|
|
|
|
|
%macro aes_hw_cpu 2
|
|
%define OPERATION %1
|
|
%define BLOCK_COUNT %2
|
|
|
|
; Load data blocks
|
|
%assign block 1
|
|
%rep BLOCK_COUNT
|
|
movdqu xmm%[block], [%[R]dx + 16 * (block - 1)]
|
|
%assign block block+1
|
|
%endrep
|
|
|
|
; Encrypt/decrypt data blocks
|
|
%assign round 0
|
|
%rep 15
|
|
movdqu xmm0, [%[R]cx + 16 * round]
|
|
|
|
%assign block 1
|
|
%rep BLOCK_COUNT
|
|
|
|
%if round = 0
|
|
pxor xmm%[block], xmm0
|
|
%else
|
|
%if round < 14
|
|
aes%[OPERATION] xmm%[block], xmm0
|
|
%else
|
|
aes%[OPERATION]last xmm%[block], xmm0
|
|
%endif
|
|
%endif
|
|
|
|
%assign block block+1
|
|
%endrep
|
|
|
|
%assign round round+1
|
|
%endrep
|
|
|
|
; Store data blocks
|
|
%assign block 1
|
|
%rep BLOCK_COUNT
|
|
movdqu [%[R]dx + 16 * (block - 1)], xmm%[block]
|
|
%assign block block+1
|
|
%endrep
|
|
|
|
%undef OPERATION
|
|
%undef BLOCK_COUNT
|
|
%endmacro
|
|
|
|
|
|
%macro aes_hw_cpu_32_blocks 1
|
|
%define OPERATION_32_BLOCKS %1
|
|
|
|
%ifidn __BITS__, 64
|
|
%define MAX_REG_BLOCK_COUNT 15
|
|
%else
|
|
%define MAX_REG_BLOCK_COUNT 7
|
|
%endif
|
|
|
|
%ifidn __OUTPUT_FORMAT__, win64
|
|
%if MAX_REG_BLOCK_COUNT > 5
|
|
push_xmm 6, MAX_REG_BLOCK_COUNT
|
|
%endif
|
|
%endif
|
|
|
|
mov eax, 32 / MAX_REG_BLOCK_COUNT
|
|
.1:
|
|
aes_hw_cpu %[OPERATION_32_BLOCKS], MAX_REG_BLOCK_COUNT
|
|
|
|
add %[R]dx, 16 * MAX_REG_BLOCK_COUNT
|
|
dec eax
|
|
jnz .1
|
|
|
|
%if (32 % MAX_REG_BLOCK_COUNT) != 0
|
|
aes_hw_cpu %[OPERATION_32_BLOCKS], (32 % MAX_REG_BLOCK_COUNT)
|
|
%endif
|
|
|
|
%ifidn __OUTPUT_FORMAT__, win64
|
|
%if MAX_REG_BLOCK_COUNT > 5
|
|
pop_xmm 6, MAX_REG_BLOCK_COUNT
|
|
%endif
|
|
%endif
|
|
|
|
%undef OPERATION_32_BLOCKS
|
|
%undef MAX_REG_BLOCK_COUNT
|
|
%endmacro
|
|
|
|
|
|
%ifidn __BITS__, 16
|
|
|
|
USE16
|
|
SEGMENT _TEXT PUBLIC CLASS=CODE USE16
|
|
SEGMENT _DATA PUBLIC CLASS=DATA USE16
|
|
GROUP DGROUP _TEXT _DATA
|
|
SECTION _TEXT
|
|
|
|
%else
|
|
|
|
SECTION .text
|
|
|
|
%endif
|
|
|
|
|
|
; void aes_hw_cpu_enable_sse ();
|
|
|
|
export_function aes_hw_cpu_enable_sse
|
|
mov %[R]ax, cr4
|
|
or ax, 1 << 9
|
|
mov cr4, %[R]ax
|
|
ret
|
|
|
|
|
|
%ifidn __BITS__, 16
|
|
|
|
|
|
; byte is_aes_hw_cpu_supported ();
|
|
|
|
export_function is_aes_hw_cpu_supported
|
|
mov eax, 1
|
|
cpuid
|
|
mov eax, ecx
|
|
shr eax, 25
|
|
and al, 1
|
|
ret
|
|
|
|
|
|
; void aes_hw_cpu_decrypt (const byte *ks, byte *data);
|
|
|
|
export_function aes_hw_cpu_decrypt
|
|
mov ax, -16
|
|
jmp aes_hw_cpu_encrypt_decrypt
|
|
|
|
; void aes_hw_cpu_encrypt (const byte *ks, byte *data);
|
|
|
|
export_function aes_hw_cpu_encrypt
|
|
mov ax, 16
|
|
|
|
aes_hw_cpu_encrypt_decrypt:
|
|
push bp
|
|
mov bp, sp
|
|
push di
|
|
push si
|
|
|
|
mov si, [bp + 4] ; ks
|
|
mov di, [bp + 4 + 2] ; data
|
|
|
|
movdqu xmm0, [si]
|
|
movdqu xmm1, [di]
|
|
|
|
pxor xmm1, xmm0
|
|
|
|
mov cx, 13
|
|
|
|
.round1_13:
|
|
add si, ax
|
|
movdqu xmm0, [si]
|
|
|
|
cmp ax, 0
|
|
jl .decrypt
|
|
|
|
aesenc xmm1, xmm0
|
|
jmp .2
|
|
.decrypt:
|
|
aesdec xmm1, xmm0
|
|
.2:
|
|
loop .round1_13
|
|
|
|
add si, ax
|
|
movdqu xmm0, [si]
|
|
|
|
cmp ax, 0
|
|
jl .decrypt_last
|
|
|
|
aesenclast xmm1, xmm0
|
|
jmp .3
|
|
.decrypt_last:
|
|
aesdeclast xmm1, xmm0
|
|
.3:
|
|
movdqu [di], xmm1
|
|
|
|
pop si
|
|
pop di
|
|
pop bp
|
|
ret
|
|
|
|
|
|
%else ; __BITS__ != 16
|
|
|
|
|
|
; byte is_aes_hw_cpu_supported ();
|
|
|
|
export_function is_aes_hw_cpu_supported
|
|
push %[R]bx
|
|
|
|
mov eax, 1
|
|
cpuid
|
|
mov eax, ecx
|
|
shr eax, 25
|
|
and eax, 1
|
|
|
|
pop %[R]bx
|
|
ret
|
|
|
|
|
|
; void aes_hw_cpu_decrypt (const byte *ks, byte *data);
|
|
|
|
aes_function_entry aes_hw_cpu_decrypt
|
|
aes_hw_cpu dec, 1
|
|
aes_function_exit
|
|
|
|
|
|
; void aes_hw_cpu_decrypt_32_blocks (const byte *ks, byte *data);
|
|
|
|
aes_function_entry aes_hw_cpu_decrypt_32_blocks
|
|
aes_hw_cpu_32_blocks dec
|
|
aes_function_exit
|
|
|
|
|
|
; void aes_hw_cpu_encrypt (const byte *ks, byte *data);
|
|
|
|
aes_function_entry aes_hw_cpu_encrypt
|
|
aes_hw_cpu enc, 1
|
|
aes_function_exit
|
|
|
|
|
|
; void aes_hw_cpu_encrypt_32_blocks (const byte *ks, byte *data);
|
|
|
|
aes_function_entry aes_hw_cpu_encrypt_32_blocks
|
|
aes_hw_cpu_32_blocks enc
|
|
aes_function_exit
|
|
|
|
|
|
%endif ; __BITS__ != 16
|