1
0
mirror of https://github.com/veracrypt/VeraCrypt.git synced 2026-06-15 09:06:08 -05:00

Windows: Add Win64 unwind metadata for AES assembly

Emit NASM-compatible .pdata/.xdata records for the x64 table AES routines and AES-NI 32-block paths.

Describe the nonvolatile GP and XMM6-XMM15 saves so kernel stack unwinding can cross these routines reliably.

Gate the metadata on win64 output so ELF and Mach-O builds keep their existing assembly paths.
This commit is contained in:
Mounir IDRASSI
2026-06-03 23:44:39 +09:00
parent a24cbe55bd
commit 7f905395c6
2 changed files with 210 additions and 87 deletions
+98 -36
View File
@@ -68,36 +68,6 @@
%endmacro
%macro push_xmm 2
sub rsp, 16 * (%2 - %1 + 1)
%assign stackoffset 0
%assign regnumber %1
%rep (%2 - %1 + 1)
movdqu [rsp + 16 * stackoffset], xmm%[regnumber]
%assign stackoffset stackoffset+1
%assign regnumber regnumber+1
%endrep
%endmacro
%macro pop_xmm 2
%assign stackoffset 0
%assign regnumber %1
%rep (%2 - %1 + 1)
movdqu xmm%[regnumber], [rsp + 16 * stackoffset]
%assign stackoffset stackoffset+1
%assign regnumber regnumber+1
%endrep
add rsp, 16 * (%2 - %1 + 1)
%endmacro
%macro aes_hw_cpu 2
%define OPERATION %1
%define BLOCK_COUNT %2
@@ -145,8 +115,9 @@
%endmacro
%macro aes_hw_cpu_32_blocks 1
%define OPERATION_32_BLOCKS %1
%macro aes_hw_cpu_32_blocks 2
%define AES_HW_CPU_32_BLOCKS_NAME %1
%define OPERATION_32_BLOCKS %2
%ifidn __BITS__, 64
%define MAX_REG_BLOCK_COUNT 15
@@ -156,7 +127,29 @@
%ifidn __OUTPUT_FORMAT__, win64
%if MAX_REG_BLOCK_COUNT > 5
push_xmm 6, MAX_REG_BLOCK_COUNT
sub rsp, 16 * (MAX_REG_BLOCK_COUNT - 6 + 1) + 8
AES_HW_CPU_32_BLOCKS_NAME %+ _alloc_end:
movdqu [rsp + 16 * 0], xmm6
AES_HW_CPU_32_BLOCKS_NAME %+ _save_xmm6_end:
movdqu [rsp + 16 * 1], xmm7
AES_HW_CPU_32_BLOCKS_NAME %+ _save_xmm7_end:
movdqu [rsp + 16 * 2], xmm8
AES_HW_CPU_32_BLOCKS_NAME %+ _save_xmm8_end:
movdqu [rsp + 16 * 3], xmm9
AES_HW_CPU_32_BLOCKS_NAME %+ _save_xmm9_end:
movdqu [rsp + 16 * 4], xmm10
AES_HW_CPU_32_BLOCKS_NAME %+ _save_xmm10_end:
movdqu [rsp + 16 * 5], xmm11
AES_HW_CPU_32_BLOCKS_NAME %+ _save_xmm11_end:
movdqu [rsp + 16 * 6], xmm12
AES_HW_CPU_32_BLOCKS_NAME %+ _save_xmm12_end:
movdqu [rsp + 16 * 7], xmm13
AES_HW_CPU_32_BLOCKS_NAME %+ _save_xmm13_end:
movdqu [rsp + 16 * 8], xmm14
AES_HW_CPU_32_BLOCKS_NAME %+ _save_xmm14_end:
movdqu [rsp + 16 * 9], xmm15
AES_HW_CPU_32_BLOCKS_NAME %+ _save_xmm15_end:
AES_HW_CPU_32_BLOCKS_NAME %+ _prolog_end:
%endif
%endif
@@ -174,15 +167,80 @@
%ifidn __OUTPUT_FORMAT__, win64
%if MAX_REG_BLOCK_COUNT > 5
pop_xmm 6, MAX_REG_BLOCK_COUNT
movdqu xmm6, [rsp + 16 * 0]
movdqu xmm7, [rsp + 16 * 1]
movdqu xmm8, [rsp + 16 * 2]
movdqu xmm9, [rsp + 16 * 3]
movdqu xmm10, [rsp + 16 * 4]
movdqu xmm11, [rsp + 16 * 5]
movdqu xmm12, [rsp + 16 * 6]
movdqu xmm13, [rsp + 16 * 7]
movdqu xmm14, [rsp + 16 * 8]
movdqu xmm15, [rsp + 16 * 9]
add rsp, 16 * (MAX_REG_BLOCK_COUNT - 6 + 1) + 8
%endif
%endif
%undef OPERATION_32_BLOCKS
%undef AES_HW_CPU_32_BLOCKS_NAME
%undef MAX_REG_BLOCK_COUNT
%endmacro
; Win64 unwind metadata for the 32-block AES-NI routines.
;
; The records below are hand-encoded and must stay in exact lockstep with the
; prologue emitted by aes_hw_cpu_32_blocks: the unwind codes describe the "sub
; rsp" allocation followed by the xmm6..xmm15 saves, listed in descending prolog
; offset order. The slot count (22 = 10 SAVE_XMM128 pairs + 1 ALLOC_LARGE pair)
; and the recorded allocation size are therefore fixed for the win64 /
; MAX_REG_BLOCK_COUNT == 15 layout. If that saved-register range or the
; allocation ever changes, update the prologue and this table together; a
; mismatch makes the OS unwinder mis-restore the caller's context.
%macro win64_aesni_32_unwind_info 2
%ifidn __OUTPUT_FORMAT__, win64
section .pdata rdata align=4
align 4
dd %1 wrt ..imagebase
dd %2 wrt ..imagebase
dd %1 %+ _unwind_info wrt ..imagebase
section .xdata rdata align=8
align 4
%1 %+ _unwind_info:
db 1
db %1 %+ _prolog_end - %1
db 22
db 0
db %1 %+ _save_xmm15_end - %1, (15 << 4) | 8
dw 9
db %1 %+ _save_xmm14_end - %1, (14 << 4) | 8
dw 8
db %1 %+ _save_xmm13_end - %1, (13 << 4) | 8
dw 7
db %1 %+ _save_xmm12_end - %1, (12 << 4) | 8
dw 6
db %1 %+ _save_xmm11_end - %1, (11 << 4) | 8
dw 5
db %1 %+ _save_xmm10_end - %1, (10 << 4) | 8
dw 4
db %1 %+ _save_xmm9_end - %1, (9 << 4) | 8
dw 3
db %1 %+ _save_xmm8_end - %1, (8 << 4) | 8
dw 2
db %1 %+ _save_xmm7_end - %1, (7 << 4) | 8
dw 1
db %1 %+ _save_xmm6_end - %1, (6 << 4) | 8
dw 0
db %1 %+ _alloc_end - %1, 1
dw (16 * (15 - 6 + 1) + 8) / 8
section .text
%endif
%endmacro
%ifidn __BITS__, 16
USE16
@@ -312,8 +370,10 @@
; void aes_hw_cpu_decrypt_32_blocks (const byte *ks, byte *data);
aes_function_entry aes_hw_cpu_decrypt_32_blocks
aes_hw_cpu_32_blocks dec
aes_hw_cpu_32_blocks aes_hw_cpu_decrypt_32_blocks, dec
aes_function_exit
aes_hw_cpu_decrypt_32_blocks_end:
win64_aesni_32_unwind_info aes_hw_cpu_decrypt_32_blocks, aes_hw_cpu_decrypt_32_blocks_end
; void aes_hw_cpu_encrypt (const byte *ks, byte *data);
@@ -326,8 +386,10 @@
; void aes_hw_cpu_encrypt_32_blocks (const byte *ks, byte *data);
aes_function_entry aes_hw_cpu_encrypt_32_blocks
aes_hw_cpu_32_blocks enc
aes_hw_cpu_32_blocks aes_hw_cpu_encrypt_32_blocks, enc
aes_function_exit
aes_hw_cpu_encrypt_32_blocks_end:
win64_aesni_32_unwind_info aes_hw_cpu_encrypt_32_blocks, aes_hw_cpu_encrypt_32_blocks_end
%endif ; __BITS__ != 16
+100 -39
View File
@@ -55,8 +55,8 @@
; The default convention is that for windows, the gnu/linux convention being
; used if __GNUC__ is defined.
;
; Define _SEH_ to include support for Win64 structured exception handling
; (this requires YASM version 0.6 or later).
; Win64 unwind metadata is emitted explicitly in .pdata/.xdata when this file
; is assembled as a PE32+ object.
;
; This code provides the standard AES block size (128 bits, 16 bytes) and the
; three standard AES key sizes (128, 192 and 256 bits). It has the same call
@@ -673,6 +673,32 @@
%endif
%macro win64_aes_unwind_info 2
%ifidn __OUTPUT_FORMAT__, win64
section .pdata rdata align=4
align 4
dd %1 wrt ..imagebase
dd %2 wrt ..imagebase
dd %1 %+ _unwind_info wrt ..imagebase
section .xdata rdata align=8
align 4
%1 %+ _unwind_info:
db 1 ; version 1, no flags
db %1 %+ .prolog_end - %1
db 6 ; unwind code slots
db 0 ; no frame register
db %1 %+ .alloc_end - %1, 2 ; UWOP_ALLOC_SMALL, 8 bytes
db %1 %+ .save_r12_end - %1, (12 << 4) | 0 ; UWOP_PUSH_NONVOL r12
db %1 %+ .save_rbp_end - %1, (5 << 4) | 0 ; UWOP_PUSH_NONVOL rbp
db %1 %+ .save_rbx_end - %1, (3 << 4) | 0 ; UWOP_PUSH_NONVOL rbx
db %1 %+ .save_rdi_end - %1, (7 << 4) | 0 ; UWOP_PUSH_NONVOL rdi
db %1 %+ .save_rsi_end - %1, (6 << 4) | 0 ; UWOP_PUSH_NONVOL rsi
section .text align=16
%endif
%endmacro
%ifdef ENCRYPTION
global aes_encrypt
@@ -691,19 +717,24 @@ enc_tab:
section .text align=16
align 16
%ifdef _SEH_
proc_frame aes_encrypt
alloc_stack 7*8 ; 7 to align stack to 16 bytes
save_reg rsi,4*8
save_reg rdi,5*8
save_reg rbx,1*8
save_reg rbp,2*8
save_reg r12,3*8
end_prologue
mov rdi, rcx ; input pointer
mov [rsp+0*8], rdx ; output pointer
%else
aes_encrypt:
%ifidn __OUTPUT_FORMAT__, win64
push rsi
.save_rsi_end:
push rdi
.save_rdi_end:
push rbx
.save_rbx_end:
push rbp
.save_rbp_end:
push r12
.save_r12_end:
sub rsp, 8
.alloc_end:
mov rdi, rcx ; input pointer
mov [rsp], rdx ; output pointer
.prolog_end:
%else
%ifdef __GNUC__
sub rsp, 4*8 ; gnu/linux binary interface
mov [rsp+0*8], rsi ; output pointer
@@ -766,25 +797,37 @@ end_prologue
mov [rbx+12], r12d
xor rax, rax
.4:
%ifidn __OUTPUT_FORMAT__, win64
add rsp, 8
pop r12
pop rbp
pop rbx
pop rdi
pop rsi
ret
%else
%ifdef __GNUC__
mov rbx, [rsp+1*8]
mov rbp, [rsp+2*8]
mov r12, [rsp+3*8]
%ifdef __GNUC__
add rsp, 4*8
ret
%else
mov rbx, [rsp+1*8]
mov rbp, [rsp+2*8]
mov r12, [rsp+3*8]
mov rsi, [rsp+4*8]
mov rdi, [rsp+5*8]
%ifdef _SEH_
add rsp, 7*8
ret
endproc_frame
%else
add rsp, 6*8
ret
%endif
%endif
%ifidn __OUTPUT_FORMAT__, win64
aes_encrypt_end:
win64_aes_unwind_info aes_encrypt, aes_encrypt_end
%endif
%endif
%ifdef DECRYPTION
@@ -805,19 +848,24 @@ dec_tab:
section .text
align 16
%ifdef _SEH_
proc_frame aes_decrypt
alloc_stack 7*8 ; 7 to align stack to 16 bytes
save_reg rsi,4*8
save_reg rdi,5*8
save_reg rbx,1*8
save_reg rbp,2*8
save_reg r12,3*8
end_prologue
mov rdi, rcx ; input pointer
mov [rsp+0*8], rdx ; output pointer
%else
aes_decrypt:
%ifidn __OUTPUT_FORMAT__, win64
push rsi
.save_rsi_end:
push rdi
.save_rdi_end:
push rbx
.save_rbx_end:
push rbp
.save_rbp_end:
push r12
.save_r12_end:
sub rsp, 8
.alloc_end:
mov rdi, rcx ; input pointer
mov [rsp], rdx ; output pointer
.prolog_end:
%else
%ifdef __GNUC__
sub rsp, 4*8 ; gnu/linux binary interface
mov [rsp+0*8], rsi ; output pointer
@@ -885,25 +933,38 @@ end_prologue
mov [rbx+8], r11d
mov [rbx+12], r12d
xor rax, rax
.4: mov rbx, [rsp+1*8]
.4:
%ifidn __OUTPUT_FORMAT__, win64
add rsp, 8
pop r12
pop rbp
pop rbx
pop rdi
pop rsi
ret
%else
%ifdef __GNUC__
mov rbx, [rsp+1*8]
mov rbp, [rsp+2*8]
mov r12, [rsp+3*8]
%ifdef __GNUC__
add rsp, 4*8
ret
%else
mov rbx, [rsp+1*8]
mov rbp, [rsp+2*8]
mov r12, [rsp+3*8]
mov rsi, [rsp+4*8]
mov rdi, [rsp+5*8]
%ifdef _SEH_
add rsp, 7*8
ret
endproc_frame
%else
add rsp, 6*8
ret
%endif
%endif
%ifidn __OUTPUT_FORMAT__, win64
aes_decrypt_end:
win64_aes_unwind_info aes_decrypt, aes_decrypt_end
%endif
%endif
%ifidn __OUTPUT_FORMAT__,elf