mirror of
https://github.com/veracrypt/VeraCrypt.git
synced 2026-06-15 09:06:08 -05:00
Windows: Add Win64 unwind metadata for AES assembly
Emit NASM-compatible .pdata/.xdata records for the x64 table AES routines and AES-NI 32-block paths. Describe the nonvolatile GP and XMM6-XMM15 saves so kernel stack unwinding can cross these routines reliably. Gate the metadata on win64 output so ELF and Mach-O builds keep their existing assembly paths.
This commit is contained in:
+98
-36
@@ -68,36 +68,6 @@
|
||||
%endmacro
|
||||
|
||||
|
||||
%macro push_xmm 2
|
||||
sub rsp, 16 * (%2 - %1 + 1)
|
||||
|
||||
%assign stackoffset 0
|
||||
%assign regnumber %1
|
||||
|
||||
%rep (%2 - %1 + 1)
|
||||
movdqu [rsp + 16 * stackoffset], xmm%[regnumber]
|
||||
|
||||
%assign stackoffset stackoffset+1
|
||||
%assign regnumber regnumber+1
|
||||
%endrep
|
||||
%endmacro
|
||||
|
||||
|
||||
%macro pop_xmm 2
|
||||
%assign stackoffset 0
|
||||
%assign regnumber %1
|
||||
|
||||
%rep (%2 - %1 + 1)
|
||||
movdqu xmm%[regnumber], [rsp + 16 * stackoffset]
|
||||
|
||||
%assign stackoffset stackoffset+1
|
||||
%assign regnumber regnumber+1
|
||||
%endrep
|
||||
|
||||
add rsp, 16 * (%2 - %1 + 1)
|
||||
%endmacro
|
||||
|
||||
|
||||
%macro aes_hw_cpu 2
|
||||
%define OPERATION %1
|
||||
%define BLOCK_COUNT %2
|
||||
@@ -145,8 +115,9 @@
|
||||
%endmacro
|
||||
|
||||
|
||||
%macro aes_hw_cpu_32_blocks 1
|
||||
%define OPERATION_32_BLOCKS %1
|
||||
%macro aes_hw_cpu_32_blocks 2
|
||||
%define AES_HW_CPU_32_BLOCKS_NAME %1
|
||||
%define OPERATION_32_BLOCKS %2
|
||||
|
||||
%ifidn __BITS__, 64
|
||||
%define MAX_REG_BLOCK_COUNT 15
|
||||
@@ -156,7 +127,29 @@
|
||||
|
||||
%ifidn __OUTPUT_FORMAT__, win64
|
||||
%if MAX_REG_BLOCK_COUNT > 5
|
||||
push_xmm 6, MAX_REG_BLOCK_COUNT
|
||||
sub rsp, 16 * (MAX_REG_BLOCK_COUNT - 6 + 1) + 8
|
||||
AES_HW_CPU_32_BLOCKS_NAME %+ _alloc_end:
|
||||
movdqu [rsp + 16 * 0], xmm6
|
||||
AES_HW_CPU_32_BLOCKS_NAME %+ _save_xmm6_end:
|
||||
movdqu [rsp + 16 * 1], xmm7
|
||||
AES_HW_CPU_32_BLOCKS_NAME %+ _save_xmm7_end:
|
||||
movdqu [rsp + 16 * 2], xmm8
|
||||
AES_HW_CPU_32_BLOCKS_NAME %+ _save_xmm8_end:
|
||||
movdqu [rsp + 16 * 3], xmm9
|
||||
AES_HW_CPU_32_BLOCKS_NAME %+ _save_xmm9_end:
|
||||
movdqu [rsp + 16 * 4], xmm10
|
||||
AES_HW_CPU_32_BLOCKS_NAME %+ _save_xmm10_end:
|
||||
movdqu [rsp + 16 * 5], xmm11
|
||||
AES_HW_CPU_32_BLOCKS_NAME %+ _save_xmm11_end:
|
||||
movdqu [rsp + 16 * 6], xmm12
|
||||
AES_HW_CPU_32_BLOCKS_NAME %+ _save_xmm12_end:
|
||||
movdqu [rsp + 16 * 7], xmm13
|
||||
AES_HW_CPU_32_BLOCKS_NAME %+ _save_xmm13_end:
|
||||
movdqu [rsp + 16 * 8], xmm14
|
||||
AES_HW_CPU_32_BLOCKS_NAME %+ _save_xmm14_end:
|
||||
movdqu [rsp + 16 * 9], xmm15
|
||||
AES_HW_CPU_32_BLOCKS_NAME %+ _save_xmm15_end:
|
||||
AES_HW_CPU_32_BLOCKS_NAME %+ _prolog_end:
|
||||
%endif
|
||||
%endif
|
||||
|
||||
@@ -174,15 +167,80 @@
|
||||
|
||||
%ifidn __OUTPUT_FORMAT__, win64
|
||||
%if MAX_REG_BLOCK_COUNT > 5
|
||||
pop_xmm 6, MAX_REG_BLOCK_COUNT
|
||||
movdqu xmm6, [rsp + 16 * 0]
|
||||
movdqu xmm7, [rsp + 16 * 1]
|
||||
movdqu xmm8, [rsp + 16 * 2]
|
||||
movdqu xmm9, [rsp + 16 * 3]
|
||||
movdqu xmm10, [rsp + 16 * 4]
|
||||
movdqu xmm11, [rsp + 16 * 5]
|
||||
movdqu xmm12, [rsp + 16 * 6]
|
||||
movdqu xmm13, [rsp + 16 * 7]
|
||||
movdqu xmm14, [rsp + 16 * 8]
|
||||
movdqu xmm15, [rsp + 16 * 9]
|
||||
add rsp, 16 * (MAX_REG_BLOCK_COUNT - 6 + 1) + 8
|
||||
%endif
|
||||
%endif
|
||||
|
||||
%undef OPERATION_32_BLOCKS
|
||||
%undef AES_HW_CPU_32_BLOCKS_NAME
|
||||
%undef MAX_REG_BLOCK_COUNT
|
||||
%endmacro
|
||||
|
||||
|
||||
; Win64 unwind metadata for the 32-block AES-NI routines.
|
||||
;
|
||||
; The records below are hand-encoded and must stay in exact lockstep with the
|
||||
; prologue emitted by aes_hw_cpu_32_blocks: the unwind codes describe the "sub
|
||||
; rsp" allocation followed by the xmm6..xmm15 saves, listed in descending prolog
|
||||
; offset order. The slot count (22 = 10 SAVE_XMM128 pairs + 1 ALLOC_LARGE pair)
|
||||
; and the recorded allocation size are therefore fixed for the win64 /
|
||||
; MAX_REG_BLOCK_COUNT == 15 layout. If that saved-register range or the
|
||||
; allocation ever changes, update the prologue and this table together; a
|
||||
; mismatch makes the OS unwinder mis-restore the caller's context.
|
||||
|
||||
%macro win64_aesni_32_unwind_info 2
|
||||
%ifidn __OUTPUT_FORMAT__, win64
|
||||
section .pdata rdata align=4
|
||||
align 4
|
||||
dd %1 wrt ..imagebase
|
||||
dd %2 wrt ..imagebase
|
||||
dd %1 %+ _unwind_info wrt ..imagebase
|
||||
|
||||
section .xdata rdata align=8
|
||||
align 4
|
||||
%1 %+ _unwind_info:
|
||||
db 1
|
||||
db %1 %+ _prolog_end - %1
|
||||
db 22
|
||||
db 0
|
||||
db %1 %+ _save_xmm15_end - %1, (15 << 4) | 8
|
||||
dw 9
|
||||
db %1 %+ _save_xmm14_end - %1, (14 << 4) | 8
|
||||
dw 8
|
||||
db %1 %+ _save_xmm13_end - %1, (13 << 4) | 8
|
||||
dw 7
|
||||
db %1 %+ _save_xmm12_end - %1, (12 << 4) | 8
|
||||
dw 6
|
||||
db %1 %+ _save_xmm11_end - %1, (11 << 4) | 8
|
||||
dw 5
|
||||
db %1 %+ _save_xmm10_end - %1, (10 << 4) | 8
|
||||
dw 4
|
||||
db %1 %+ _save_xmm9_end - %1, (9 << 4) | 8
|
||||
dw 3
|
||||
db %1 %+ _save_xmm8_end - %1, (8 << 4) | 8
|
||||
dw 2
|
||||
db %1 %+ _save_xmm7_end - %1, (7 << 4) | 8
|
||||
dw 1
|
||||
db %1 %+ _save_xmm6_end - %1, (6 << 4) | 8
|
||||
dw 0
|
||||
db %1 %+ _alloc_end - %1, 1
|
||||
dw (16 * (15 - 6 + 1) + 8) / 8
|
||||
|
||||
section .text
|
||||
%endif
|
||||
%endmacro
|
||||
|
||||
|
||||
%ifidn __BITS__, 16
|
||||
|
||||
USE16
|
||||
@@ -312,8 +370,10 @@
|
||||
; void aes_hw_cpu_decrypt_32_blocks (const byte *ks, byte *data);
|
||||
|
||||
aes_function_entry aes_hw_cpu_decrypt_32_blocks
|
||||
aes_hw_cpu_32_blocks dec
|
||||
aes_hw_cpu_32_blocks aes_hw_cpu_decrypt_32_blocks, dec
|
||||
aes_function_exit
|
||||
aes_hw_cpu_decrypt_32_blocks_end:
|
||||
win64_aesni_32_unwind_info aes_hw_cpu_decrypt_32_blocks, aes_hw_cpu_decrypt_32_blocks_end
|
||||
|
||||
|
||||
; void aes_hw_cpu_encrypt (const byte *ks, byte *data);
|
||||
@@ -326,8 +386,10 @@
|
||||
; void aes_hw_cpu_encrypt_32_blocks (const byte *ks, byte *data);
|
||||
|
||||
aes_function_entry aes_hw_cpu_encrypt_32_blocks
|
||||
aes_hw_cpu_32_blocks enc
|
||||
aes_hw_cpu_32_blocks aes_hw_cpu_encrypt_32_blocks, enc
|
||||
aes_function_exit
|
||||
aes_hw_cpu_encrypt_32_blocks_end:
|
||||
win64_aesni_32_unwind_info aes_hw_cpu_encrypt_32_blocks, aes_hw_cpu_encrypt_32_blocks_end
|
||||
|
||||
|
||||
%endif ; __BITS__ != 16
|
||||
|
||||
+100
-39
@@ -55,8 +55,8 @@
|
||||
; The default convention is that for windows, the gnu/linux convention being
|
||||
; used if __GNUC__ is defined.
|
||||
;
|
||||
; Define _SEH_ to include support for Win64 structured exception handling
|
||||
; (this requires YASM version 0.6 or later).
|
||||
; Win64 unwind metadata is emitted explicitly in .pdata/.xdata when this file
|
||||
; is assembled as a PE32+ object.
|
||||
;
|
||||
; This code provides the standard AES block size (128 bits, 16 bytes) and the
|
||||
; three standard AES key sizes (128, 192 and 256 bits). It has the same call
|
||||
@@ -673,6 +673,32 @@
|
||||
|
||||
%endif
|
||||
|
||||
%macro win64_aes_unwind_info 2
|
||||
%ifidn __OUTPUT_FORMAT__, win64
|
||||
section .pdata rdata align=4
|
||||
align 4
|
||||
dd %1 wrt ..imagebase
|
||||
dd %2 wrt ..imagebase
|
||||
dd %1 %+ _unwind_info wrt ..imagebase
|
||||
|
||||
section .xdata rdata align=8
|
||||
align 4
|
||||
%1 %+ _unwind_info:
|
||||
db 1 ; version 1, no flags
|
||||
db %1 %+ .prolog_end - %1
|
||||
db 6 ; unwind code slots
|
||||
db 0 ; no frame register
|
||||
db %1 %+ .alloc_end - %1, 2 ; UWOP_ALLOC_SMALL, 8 bytes
|
||||
db %1 %+ .save_r12_end - %1, (12 << 4) | 0 ; UWOP_PUSH_NONVOL r12
|
||||
db %1 %+ .save_rbp_end - %1, (5 << 4) | 0 ; UWOP_PUSH_NONVOL rbp
|
||||
db %1 %+ .save_rbx_end - %1, (3 << 4) | 0 ; UWOP_PUSH_NONVOL rbx
|
||||
db %1 %+ .save_rdi_end - %1, (7 << 4) | 0 ; UWOP_PUSH_NONVOL rdi
|
||||
db %1 %+ .save_rsi_end - %1, (6 << 4) | 0 ; UWOP_PUSH_NONVOL rsi
|
||||
|
||||
section .text align=16
|
||||
%endif
|
||||
%endmacro
|
||||
|
||||
%ifdef ENCRYPTION
|
||||
|
||||
global aes_encrypt
|
||||
@@ -691,19 +717,24 @@ enc_tab:
|
||||
section .text align=16
|
||||
align 16
|
||||
|
||||
%ifdef _SEH_
|
||||
proc_frame aes_encrypt
|
||||
alloc_stack 7*8 ; 7 to align stack to 16 bytes
|
||||
save_reg rsi,4*8
|
||||
save_reg rdi,5*8
|
||||
save_reg rbx,1*8
|
||||
save_reg rbp,2*8
|
||||
save_reg r12,3*8
|
||||
end_prologue
|
||||
mov rdi, rcx ; input pointer
|
||||
mov [rsp+0*8], rdx ; output pointer
|
||||
%else
|
||||
aes_encrypt:
|
||||
%ifidn __OUTPUT_FORMAT__, win64
|
||||
push rsi
|
||||
.save_rsi_end:
|
||||
push rdi
|
||||
.save_rdi_end:
|
||||
push rbx
|
||||
.save_rbx_end:
|
||||
push rbp
|
||||
.save_rbp_end:
|
||||
push r12
|
||||
.save_r12_end:
|
||||
sub rsp, 8
|
||||
.alloc_end:
|
||||
mov rdi, rcx ; input pointer
|
||||
mov [rsp], rdx ; output pointer
|
||||
.prolog_end:
|
||||
%else
|
||||
%ifdef __GNUC__
|
||||
sub rsp, 4*8 ; gnu/linux binary interface
|
||||
mov [rsp+0*8], rsi ; output pointer
|
||||
@@ -766,25 +797,37 @@ end_prologue
|
||||
mov [rbx+12], r12d
|
||||
xor rax, rax
|
||||
.4:
|
||||
%ifidn __OUTPUT_FORMAT__, win64
|
||||
add rsp, 8
|
||||
pop r12
|
||||
pop rbp
|
||||
pop rbx
|
||||
pop rdi
|
||||
pop rsi
|
||||
ret
|
||||
%else
|
||||
%ifdef __GNUC__
|
||||
mov rbx, [rsp+1*8]
|
||||
mov rbp, [rsp+2*8]
|
||||
mov r12, [rsp+3*8]
|
||||
%ifdef __GNUC__
|
||||
add rsp, 4*8
|
||||
ret
|
||||
%else
|
||||
mov rbx, [rsp+1*8]
|
||||
mov rbp, [rsp+2*8]
|
||||
mov r12, [rsp+3*8]
|
||||
mov rsi, [rsp+4*8]
|
||||
mov rdi, [rsp+5*8]
|
||||
%ifdef _SEH_
|
||||
add rsp, 7*8
|
||||
ret
|
||||
endproc_frame
|
||||
%else
|
||||
add rsp, 6*8
|
||||
ret
|
||||
%endif
|
||||
%endif
|
||||
|
||||
%ifidn __OUTPUT_FORMAT__, win64
|
||||
aes_encrypt_end:
|
||||
win64_aes_unwind_info aes_encrypt, aes_encrypt_end
|
||||
%endif
|
||||
|
||||
%endif
|
||||
|
||||
%ifdef DECRYPTION
|
||||
@@ -805,19 +848,24 @@ dec_tab:
|
||||
section .text
|
||||
align 16
|
||||
|
||||
%ifdef _SEH_
|
||||
proc_frame aes_decrypt
|
||||
alloc_stack 7*8 ; 7 to align stack to 16 bytes
|
||||
save_reg rsi,4*8
|
||||
save_reg rdi,5*8
|
||||
save_reg rbx,1*8
|
||||
save_reg rbp,2*8
|
||||
save_reg r12,3*8
|
||||
end_prologue
|
||||
mov rdi, rcx ; input pointer
|
||||
mov [rsp+0*8], rdx ; output pointer
|
||||
%else
|
||||
aes_decrypt:
|
||||
%ifidn __OUTPUT_FORMAT__, win64
|
||||
push rsi
|
||||
.save_rsi_end:
|
||||
push rdi
|
||||
.save_rdi_end:
|
||||
push rbx
|
||||
.save_rbx_end:
|
||||
push rbp
|
||||
.save_rbp_end:
|
||||
push r12
|
||||
.save_r12_end:
|
||||
sub rsp, 8
|
||||
.alloc_end:
|
||||
mov rdi, rcx ; input pointer
|
||||
mov [rsp], rdx ; output pointer
|
||||
.prolog_end:
|
||||
%else
|
||||
%ifdef __GNUC__
|
||||
sub rsp, 4*8 ; gnu/linux binary interface
|
||||
mov [rsp+0*8], rsi ; output pointer
|
||||
@@ -885,25 +933,38 @@ end_prologue
|
||||
mov [rbx+8], r11d
|
||||
mov [rbx+12], r12d
|
||||
xor rax, rax
|
||||
.4: mov rbx, [rsp+1*8]
|
||||
.4:
|
||||
%ifidn __OUTPUT_FORMAT__, win64
|
||||
add rsp, 8
|
||||
pop r12
|
||||
pop rbp
|
||||
pop rbx
|
||||
pop rdi
|
||||
pop rsi
|
||||
ret
|
||||
%else
|
||||
%ifdef __GNUC__
|
||||
mov rbx, [rsp+1*8]
|
||||
mov rbp, [rsp+2*8]
|
||||
mov r12, [rsp+3*8]
|
||||
%ifdef __GNUC__
|
||||
add rsp, 4*8
|
||||
ret
|
||||
%else
|
||||
mov rbx, [rsp+1*8]
|
||||
mov rbp, [rsp+2*8]
|
||||
mov r12, [rsp+3*8]
|
||||
mov rsi, [rsp+4*8]
|
||||
mov rdi, [rsp+5*8]
|
||||
%ifdef _SEH_
|
||||
add rsp, 7*8
|
||||
ret
|
||||
endproc_frame
|
||||
%else
|
||||
add rsp, 6*8
|
||||
ret
|
||||
%endif
|
||||
%endif
|
||||
|
||||
%ifidn __OUTPUT_FORMAT__, win64
|
||||
aes_decrypt_end:
|
||||
win64_aes_unwind_info aes_decrypt, aes_decrypt_end
|
||||
%endif
|
||||
|
||||
%endif
|
||||
|
||||
%ifidn __OUTPUT_FORMAT__,elf
|
||||
|
||||
Reference in New Issue
Block a user