mirror of
https://github.com/veracrypt/VeraCrypt.git
synced 2026-06-15 09:06:08 -05:00
Windows: Add Win64 unwind metadata for AES assembly
Emit NASM-compatible .pdata/.xdata records for the x64 table AES routines and AES-NI 32-block paths. Describe the nonvolatile GP and XMM6-XMM15 saves so kernel stack unwinding can cross these routines reliably. Gate the metadata on win64 output so ELF and Mach-O builds keep their existing assembly paths.
This commit is contained in:
+98
-36
@@ -68,36 +68,6 @@
|
|||||||
%endmacro
|
%endmacro
|
||||||
|
|
||||||
|
|
||||||
%macro push_xmm 2
|
|
||||||
sub rsp, 16 * (%2 - %1 + 1)
|
|
||||||
|
|
||||||
%assign stackoffset 0
|
|
||||||
%assign regnumber %1
|
|
||||||
|
|
||||||
%rep (%2 - %1 + 1)
|
|
||||||
movdqu [rsp + 16 * stackoffset], xmm%[regnumber]
|
|
||||||
|
|
||||||
%assign stackoffset stackoffset+1
|
|
||||||
%assign regnumber regnumber+1
|
|
||||||
%endrep
|
|
||||||
%endmacro
|
|
||||||
|
|
||||||
|
|
||||||
%macro pop_xmm 2
|
|
||||||
%assign stackoffset 0
|
|
||||||
%assign regnumber %1
|
|
||||||
|
|
||||||
%rep (%2 - %1 + 1)
|
|
||||||
movdqu xmm%[regnumber], [rsp + 16 * stackoffset]
|
|
||||||
|
|
||||||
%assign stackoffset stackoffset+1
|
|
||||||
%assign regnumber regnumber+1
|
|
||||||
%endrep
|
|
||||||
|
|
||||||
add rsp, 16 * (%2 - %1 + 1)
|
|
||||||
%endmacro
|
|
||||||
|
|
||||||
|
|
||||||
%macro aes_hw_cpu 2
|
%macro aes_hw_cpu 2
|
||||||
%define OPERATION %1
|
%define OPERATION %1
|
||||||
%define BLOCK_COUNT %2
|
%define BLOCK_COUNT %2
|
||||||
@@ -145,8 +115,9 @@
|
|||||||
%endmacro
|
%endmacro
|
||||||
|
|
||||||
|
|
||||||
%macro aes_hw_cpu_32_blocks 1
|
%macro aes_hw_cpu_32_blocks 2
|
||||||
%define OPERATION_32_BLOCKS %1
|
%define AES_HW_CPU_32_BLOCKS_NAME %1
|
||||||
|
%define OPERATION_32_BLOCKS %2
|
||||||
|
|
||||||
%ifidn __BITS__, 64
|
%ifidn __BITS__, 64
|
||||||
%define MAX_REG_BLOCK_COUNT 15
|
%define MAX_REG_BLOCK_COUNT 15
|
||||||
@@ -156,7 +127,29 @@
|
|||||||
|
|
||||||
%ifidn __OUTPUT_FORMAT__, win64
|
%ifidn __OUTPUT_FORMAT__, win64
|
||||||
%if MAX_REG_BLOCK_COUNT > 5
|
%if MAX_REG_BLOCK_COUNT > 5
|
||||||
push_xmm 6, MAX_REG_BLOCK_COUNT
|
sub rsp, 16 * (MAX_REG_BLOCK_COUNT - 6 + 1) + 8
|
||||||
|
AES_HW_CPU_32_BLOCKS_NAME %+ _alloc_end:
|
||||||
|
movdqu [rsp + 16 * 0], xmm6
|
||||||
|
AES_HW_CPU_32_BLOCKS_NAME %+ _save_xmm6_end:
|
||||||
|
movdqu [rsp + 16 * 1], xmm7
|
||||||
|
AES_HW_CPU_32_BLOCKS_NAME %+ _save_xmm7_end:
|
||||||
|
movdqu [rsp + 16 * 2], xmm8
|
||||||
|
AES_HW_CPU_32_BLOCKS_NAME %+ _save_xmm8_end:
|
||||||
|
movdqu [rsp + 16 * 3], xmm9
|
||||||
|
AES_HW_CPU_32_BLOCKS_NAME %+ _save_xmm9_end:
|
||||||
|
movdqu [rsp + 16 * 4], xmm10
|
||||||
|
AES_HW_CPU_32_BLOCKS_NAME %+ _save_xmm10_end:
|
||||||
|
movdqu [rsp + 16 * 5], xmm11
|
||||||
|
AES_HW_CPU_32_BLOCKS_NAME %+ _save_xmm11_end:
|
||||||
|
movdqu [rsp + 16 * 6], xmm12
|
||||||
|
AES_HW_CPU_32_BLOCKS_NAME %+ _save_xmm12_end:
|
||||||
|
movdqu [rsp + 16 * 7], xmm13
|
||||||
|
AES_HW_CPU_32_BLOCKS_NAME %+ _save_xmm13_end:
|
||||||
|
movdqu [rsp + 16 * 8], xmm14
|
||||||
|
AES_HW_CPU_32_BLOCKS_NAME %+ _save_xmm14_end:
|
||||||
|
movdqu [rsp + 16 * 9], xmm15
|
||||||
|
AES_HW_CPU_32_BLOCKS_NAME %+ _save_xmm15_end:
|
||||||
|
AES_HW_CPU_32_BLOCKS_NAME %+ _prolog_end:
|
||||||
%endif
|
%endif
|
||||||
%endif
|
%endif
|
||||||
|
|
||||||
@@ -174,15 +167,80 @@
|
|||||||
|
|
||||||
%ifidn __OUTPUT_FORMAT__, win64
|
%ifidn __OUTPUT_FORMAT__, win64
|
||||||
%if MAX_REG_BLOCK_COUNT > 5
|
%if MAX_REG_BLOCK_COUNT > 5
|
||||||
pop_xmm 6, MAX_REG_BLOCK_COUNT
|
movdqu xmm6, [rsp + 16 * 0]
|
||||||
|
movdqu xmm7, [rsp + 16 * 1]
|
||||||
|
movdqu xmm8, [rsp + 16 * 2]
|
||||||
|
movdqu xmm9, [rsp + 16 * 3]
|
||||||
|
movdqu xmm10, [rsp + 16 * 4]
|
||||||
|
movdqu xmm11, [rsp + 16 * 5]
|
||||||
|
movdqu xmm12, [rsp + 16 * 6]
|
||||||
|
movdqu xmm13, [rsp + 16 * 7]
|
||||||
|
movdqu xmm14, [rsp + 16 * 8]
|
||||||
|
movdqu xmm15, [rsp + 16 * 9]
|
||||||
|
add rsp, 16 * (MAX_REG_BLOCK_COUNT - 6 + 1) + 8
|
||||||
%endif
|
%endif
|
||||||
%endif
|
%endif
|
||||||
|
|
||||||
%undef OPERATION_32_BLOCKS
|
%undef OPERATION_32_BLOCKS
|
||||||
|
%undef AES_HW_CPU_32_BLOCKS_NAME
|
||||||
%undef MAX_REG_BLOCK_COUNT
|
%undef MAX_REG_BLOCK_COUNT
|
||||||
%endmacro
|
%endmacro
|
||||||
|
|
||||||
|
|
||||||
|
; Win64 unwind metadata for the 32-block AES-NI routines.
|
||||||
|
;
|
||||||
|
; The records below are hand-encoded and must stay in exact lockstep with the
|
||||||
|
; prologue emitted by aes_hw_cpu_32_blocks: the unwind codes describe the "sub
|
||||||
|
; rsp" allocation followed by the xmm6..xmm15 saves, listed in descending prolog
|
||||||
|
; offset order. The slot count (22 = 10 SAVE_XMM128 pairs + 1 ALLOC_LARGE pair)
|
||||||
|
; and the recorded allocation size are therefore fixed for the win64 /
|
||||||
|
; MAX_REG_BLOCK_COUNT == 15 layout. If that saved-register range or the
|
||||||
|
; allocation ever changes, update the prologue and this table together; a
|
||||||
|
; mismatch makes the OS unwinder mis-restore the caller's context.
|
||||||
|
|
||||||
|
%macro win64_aesni_32_unwind_info 2
|
||||||
|
%ifidn __OUTPUT_FORMAT__, win64
|
||||||
|
section .pdata rdata align=4
|
||||||
|
align 4
|
||||||
|
dd %1 wrt ..imagebase
|
||||||
|
dd %2 wrt ..imagebase
|
||||||
|
dd %1 %+ _unwind_info wrt ..imagebase
|
||||||
|
|
||||||
|
section .xdata rdata align=8
|
||||||
|
align 4
|
||||||
|
%1 %+ _unwind_info:
|
||||||
|
db 1
|
||||||
|
db %1 %+ _prolog_end - %1
|
||||||
|
db 22
|
||||||
|
db 0
|
||||||
|
db %1 %+ _save_xmm15_end - %1, (15 << 4) | 8
|
||||||
|
dw 9
|
||||||
|
db %1 %+ _save_xmm14_end - %1, (14 << 4) | 8
|
||||||
|
dw 8
|
||||||
|
db %1 %+ _save_xmm13_end - %1, (13 << 4) | 8
|
||||||
|
dw 7
|
||||||
|
db %1 %+ _save_xmm12_end - %1, (12 << 4) | 8
|
||||||
|
dw 6
|
||||||
|
db %1 %+ _save_xmm11_end - %1, (11 << 4) | 8
|
||||||
|
dw 5
|
||||||
|
db %1 %+ _save_xmm10_end - %1, (10 << 4) | 8
|
||||||
|
dw 4
|
||||||
|
db %1 %+ _save_xmm9_end - %1, (9 << 4) | 8
|
||||||
|
dw 3
|
||||||
|
db %1 %+ _save_xmm8_end - %1, (8 << 4) | 8
|
||||||
|
dw 2
|
||||||
|
db %1 %+ _save_xmm7_end - %1, (7 << 4) | 8
|
||||||
|
dw 1
|
||||||
|
db %1 %+ _save_xmm6_end - %1, (6 << 4) | 8
|
||||||
|
dw 0
|
||||||
|
db %1 %+ _alloc_end - %1, 1
|
||||||
|
dw (16 * (15 - 6 + 1) + 8) / 8
|
||||||
|
|
||||||
|
section .text
|
||||||
|
%endif
|
||||||
|
%endmacro
|
||||||
|
|
||||||
|
|
||||||
%ifidn __BITS__, 16
|
%ifidn __BITS__, 16
|
||||||
|
|
||||||
USE16
|
USE16
|
||||||
@@ -312,8 +370,10 @@
|
|||||||
; void aes_hw_cpu_decrypt_32_blocks (const byte *ks, byte *data);
|
; void aes_hw_cpu_decrypt_32_blocks (const byte *ks, byte *data);
|
||||||
|
|
||||||
aes_function_entry aes_hw_cpu_decrypt_32_blocks
|
aes_function_entry aes_hw_cpu_decrypt_32_blocks
|
||||||
aes_hw_cpu_32_blocks dec
|
aes_hw_cpu_32_blocks aes_hw_cpu_decrypt_32_blocks, dec
|
||||||
aes_function_exit
|
aes_function_exit
|
||||||
|
aes_hw_cpu_decrypt_32_blocks_end:
|
||||||
|
win64_aesni_32_unwind_info aes_hw_cpu_decrypt_32_blocks, aes_hw_cpu_decrypt_32_blocks_end
|
||||||
|
|
||||||
|
|
||||||
; void aes_hw_cpu_encrypt (const byte *ks, byte *data);
|
; void aes_hw_cpu_encrypt (const byte *ks, byte *data);
|
||||||
@@ -326,8 +386,10 @@
|
|||||||
; void aes_hw_cpu_encrypt_32_blocks (const byte *ks, byte *data);
|
; void aes_hw_cpu_encrypt_32_blocks (const byte *ks, byte *data);
|
||||||
|
|
||||||
aes_function_entry aes_hw_cpu_encrypt_32_blocks
|
aes_function_entry aes_hw_cpu_encrypt_32_blocks
|
||||||
aes_hw_cpu_32_blocks enc
|
aes_hw_cpu_32_blocks aes_hw_cpu_encrypt_32_blocks, enc
|
||||||
aes_function_exit
|
aes_function_exit
|
||||||
|
aes_hw_cpu_encrypt_32_blocks_end:
|
||||||
|
win64_aesni_32_unwind_info aes_hw_cpu_encrypt_32_blocks, aes_hw_cpu_encrypt_32_blocks_end
|
||||||
|
|
||||||
|
|
||||||
%endif ; __BITS__ != 16
|
%endif ; __BITS__ != 16
|
||||||
|
|||||||
+100
-39
@@ -55,8 +55,8 @@
|
|||||||
; The default convention is that for windows, the gnu/linux convention being
|
; The default convention is that for windows, the gnu/linux convention being
|
||||||
; used if __GNUC__ is defined.
|
; used if __GNUC__ is defined.
|
||||||
;
|
;
|
||||||
; Define _SEH_ to include support for Win64 structured exception handling
|
; Win64 unwind metadata is emitted explicitly in .pdata/.xdata when this file
|
||||||
; (this requires YASM version 0.6 or later).
|
; is assembled as a PE32+ object.
|
||||||
;
|
;
|
||||||
; This code provides the standard AES block size (128 bits, 16 bytes) and the
|
; This code provides the standard AES block size (128 bits, 16 bytes) and the
|
||||||
; three standard AES key sizes (128, 192 and 256 bits). It has the same call
|
; three standard AES key sizes (128, 192 and 256 bits). It has the same call
|
||||||
@@ -673,6 +673,32 @@
|
|||||||
|
|
||||||
%endif
|
%endif
|
||||||
|
|
||||||
|
%macro win64_aes_unwind_info 2
|
||||||
|
%ifidn __OUTPUT_FORMAT__, win64
|
||||||
|
section .pdata rdata align=4
|
||||||
|
align 4
|
||||||
|
dd %1 wrt ..imagebase
|
||||||
|
dd %2 wrt ..imagebase
|
||||||
|
dd %1 %+ _unwind_info wrt ..imagebase
|
||||||
|
|
||||||
|
section .xdata rdata align=8
|
||||||
|
align 4
|
||||||
|
%1 %+ _unwind_info:
|
||||||
|
db 1 ; version 1, no flags
|
||||||
|
db %1 %+ .prolog_end - %1
|
||||||
|
db 6 ; unwind code slots
|
||||||
|
db 0 ; no frame register
|
||||||
|
db %1 %+ .alloc_end - %1, 2 ; UWOP_ALLOC_SMALL, 8 bytes
|
||||||
|
db %1 %+ .save_r12_end - %1, (12 << 4) | 0 ; UWOP_PUSH_NONVOL r12
|
||||||
|
db %1 %+ .save_rbp_end - %1, (5 << 4) | 0 ; UWOP_PUSH_NONVOL rbp
|
||||||
|
db %1 %+ .save_rbx_end - %1, (3 << 4) | 0 ; UWOP_PUSH_NONVOL rbx
|
||||||
|
db %1 %+ .save_rdi_end - %1, (7 << 4) | 0 ; UWOP_PUSH_NONVOL rdi
|
||||||
|
db %1 %+ .save_rsi_end - %1, (6 << 4) | 0 ; UWOP_PUSH_NONVOL rsi
|
||||||
|
|
||||||
|
section .text align=16
|
||||||
|
%endif
|
||||||
|
%endmacro
|
||||||
|
|
||||||
%ifdef ENCRYPTION
|
%ifdef ENCRYPTION
|
||||||
|
|
||||||
global aes_encrypt
|
global aes_encrypt
|
||||||
@@ -691,19 +717,24 @@ enc_tab:
|
|||||||
section .text align=16
|
section .text align=16
|
||||||
align 16
|
align 16
|
||||||
|
|
||||||
%ifdef _SEH_
|
|
||||||
proc_frame aes_encrypt
|
|
||||||
alloc_stack 7*8 ; 7 to align stack to 16 bytes
|
|
||||||
save_reg rsi,4*8
|
|
||||||
save_reg rdi,5*8
|
|
||||||
save_reg rbx,1*8
|
|
||||||
save_reg rbp,2*8
|
|
||||||
save_reg r12,3*8
|
|
||||||
end_prologue
|
|
||||||
mov rdi, rcx ; input pointer
|
|
||||||
mov [rsp+0*8], rdx ; output pointer
|
|
||||||
%else
|
|
||||||
aes_encrypt:
|
aes_encrypt:
|
||||||
|
%ifidn __OUTPUT_FORMAT__, win64
|
||||||
|
push rsi
|
||||||
|
.save_rsi_end:
|
||||||
|
push rdi
|
||||||
|
.save_rdi_end:
|
||||||
|
push rbx
|
||||||
|
.save_rbx_end:
|
||||||
|
push rbp
|
||||||
|
.save_rbp_end:
|
||||||
|
push r12
|
||||||
|
.save_r12_end:
|
||||||
|
sub rsp, 8
|
||||||
|
.alloc_end:
|
||||||
|
mov rdi, rcx ; input pointer
|
||||||
|
mov [rsp], rdx ; output pointer
|
||||||
|
.prolog_end:
|
||||||
|
%else
|
||||||
%ifdef __GNUC__
|
%ifdef __GNUC__
|
||||||
sub rsp, 4*8 ; gnu/linux binary interface
|
sub rsp, 4*8 ; gnu/linux binary interface
|
||||||
mov [rsp+0*8], rsi ; output pointer
|
mov [rsp+0*8], rsi ; output pointer
|
||||||
@@ -766,25 +797,37 @@ end_prologue
|
|||||||
mov [rbx+12], r12d
|
mov [rbx+12], r12d
|
||||||
xor rax, rax
|
xor rax, rax
|
||||||
.4:
|
.4:
|
||||||
|
%ifidn __OUTPUT_FORMAT__, win64
|
||||||
|
add rsp, 8
|
||||||
|
pop r12
|
||||||
|
pop rbp
|
||||||
|
pop rbx
|
||||||
|
pop rdi
|
||||||
|
pop rsi
|
||||||
|
ret
|
||||||
|
%else
|
||||||
|
%ifdef __GNUC__
|
||||||
mov rbx, [rsp+1*8]
|
mov rbx, [rsp+1*8]
|
||||||
mov rbp, [rsp+2*8]
|
mov rbp, [rsp+2*8]
|
||||||
mov r12, [rsp+3*8]
|
mov r12, [rsp+3*8]
|
||||||
%ifdef __GNUC__
|
|
||||||
add rsp, 4*8
|
add rsp, 4*8
|
||||||
ret
|
ret
|
||||||
%else
|
%else
|
||||||
|
mov rbx, [rsp+1*8]
|
||||||
|
mov rbp, [rsp+2*8]
|
||||||
|
mov r12, [rsp+3*8]
|
||||||
mov rsi, [rsp+4*8]
|
mov rsi, [rsp+4*8]
|
||||||
mov rdi, [rsp+5*8]
|
mov rdi, [rsp+5*8]
|
||||||
%ifdef _SEH_
|
|
||||||
add rsp, 7*8
|
|
||||||
ret
|
|
||||||
endproc_frame
|
|
||||||
%else
|
|
||||||
add rsp, 6*8
|
add rsp, 6*8
|
||||||
ret
|
ret
|
||||||
%endif
|
%endif
|
||||||
%endif
|
%endif
|
||||||
|
|
||||||
|
%ifidn __OUTPUT_FORMAT__, win64
|
||||||
|
aes_encrypt_end:
|
||||||
|
win64_aes_unwind_info aes_encrypt, aes_encrypt_end
|
||||||
|
%endif
|
||||||
|
|
||||||
%endif
|
%endif
|
||||||
|
|
||||||
%ifdef DECRYPTION
|
%ifdef DECRYPTION
|
||||||
@@ -805,19 +848,24 @@ dec_tab:
|
|||||||
section .text
|
section .text
|
||||||
align 16
|
align 16
|
||||||
|
|
||||||
%ifdef _SEH_
|
|
||||||
proc_frame aes_decrypt
|
|
||||||
alloc_stack 7*8 ; 7 to align stack to 16 bytes
|
|
||||||
save_reg rsi,4*8
|
|
||||||
save_reg rdi,5*8
|
|
||||||
save_reg rbx,1*8
|
|
||||||
save_reg rbp,2*8
|
|
||||||
save_reg r12,3*8
|
|
||||||
end_prologue
|
|
||||||
mov rdi, rcx ; input pointer
|
|
||||||
mov [rsp+0*8], rdx ; output pointer
|
|
||||||
%else
|
|
||||||
aes_decrypt:
|
aes_decrypt:
|
||||||
|
%ifidn __OUTPUT_FORMAT__, win64
|
||||||
|
push rsi
|
||||||
|
.save_rsi_end:
|
||||||
|
push rdi
|
||||||
|
.save_rdi_end:
|
||||||
|
push rbx
|
||||||
|
.save_rbx_end:
|
||||||
|
push rbp
|
||||||
|
.save_rbp_end:
|
||||||
|
push r12
|
||||||
|
.save_r12_end:
|
||||||
|
sub rsp, 8
|
||||||
|
.alloc_end:
|
||||||
|
mov rdi, rcx ; input pointer
|
||||||
|
mov [rsp], rdx ; output pointer
|
||||||
|
.prolog_end:
|
||||||
|
%else
|
||||||
%ifdef __GNUC__
|
%ifdef __GNUC__
|
||||||
sub rsp, 4*8 ; gnu/linux binary interface
|
sub rsp, 4*8 ; gnu/linux binary interface
|
||||||
mov [rsp+0*8], rsi ; output pointer
|
mov [rsp+0*8], rsi ; output pointer
|
||||||
@@ -885,25 +933,38 @@ end_prologue
|
|||||||
mov [rbx+8], r11d
|
mov [rbx+8], r11d
|
||||||
mov [rbx+12], r12d
|
mov [rbx+12], r12d
|
||||||
xor rax, rax
|
xor rax, rax
|
||||||
.4: mov rbx, [rsp+1*8]
|
.4:
|
||||||
|
%ifidn __OUTPUT_FORMAT__, win64
|
||||||
|
add rsp, 8
|
||||||
|
pop r12
|
||||||
|
pop rbp
|
||||||
|
pop rbx
|
||||||
|
pop rdi
|
||||||
|
pop rsi
|
||||||
|
ret
|
||||||
|
%else
|
||||||
|
%ifdef __GNUC__
|
||||||
|
mov rbx, [rsp+1*8]
|
||||||
mov rbp, [rsp+2*8]
|
mov rbp, [rsp+2*8]
|
||||||
mov r12, [rsp+3*8]
|
mov r12, [rsp+3*8]
|
||||||
%ifdef __GNUC__
|
|
||||||
add rsp, 4*8
|
add rsp, 4*8
|
||||||
ret
|
ret
|
||||||
%else
|
%else
|
||||||
|
mov rbx, [rsp+1*8]
|
||||||
|
mov rbp, [rsp+2*8]
|
||||||
|
mov r12, [rsp+3*8]
|
||||||
mov rsi, [rsp+4*8]
|
mov rsi, [rsp+4*8]
|
||||||
mov rdi, [rsp+5*8]
|
mov rdi, [rsp+5*8]
|
||||||
%ifdef _SEH_
|
|
||||||
add rsp, 7*8
|
|
||||||
ret
|
|
||||||
endproc_frame
|
|
||||||
%else
|
|
||||||
add rsp, 6*8
|
add rsp, 6*8
|
||||||
ret
|
ret
|
||||||
%endif
|
%endif
|
||||||
%endif
|
%endif
|
||||||
|
|
||||||
|
%ifidn __OUTPUT_FORMAT__, win64
|
||||||
|
aes_decrypt_end:
|
||||||
|
win64_aes_unwind_info aes_decrypt, aes_decrypt_end
|
||||||
|
%endif
|
||||||
|
|
||||||
%endif
|
%endif
|
||||||
|
|
||||||
%ifidn __OUTPUT_FORMAT__,elf
|
%ifidn __OUTPUT_FORMAT__,elf
|
||||||
|
|||||||
Reference in New Issue
Block a user