mirror of
https://github.com/veracrypt/VeraCrypt.git
synced 2026-06-09 22:36:59 -05:00
Windows: speedup PRF autodetection mode by implementing abort mechanism in PBKDF2/Argon2 primitives
This commit is contained in:
@@ -165,7 +165,9 @@ typedef enum Argon2_ErrorCodes {
|
||||
|
||||
ARGON2_DECODING_LENGTH_FAIL = -34,
|
||||
|
||||
ARGON2_VERIFY_MISMATCH = -35
|
||||
ARGON2_VERIFY_MISMATCH = -35,
|
||||
|
||||
ARGON2_OPERATION_CANCELLED = -36
|
||||
} argon2_error_codes;
|
||||
|
||||
/* Memory allocator types --- for external allocation */
|
||||
@@ -222,6 +224,9 @@ typedef struct Argon2_Context {
|
||||
|
||||
uint32_t version; /* version number */
|
||||
|
||||
/* Cancellation token for VeraCrypt */
|
||||
long volatile *pAbortKeyDerivation;
|
||||
|
||||
allocate_fptr allocate_cbk; /* pointer to memory allocator */
|
||||
deallocate_fptr free_cbk; /* pointer to memory deallocator */
|
||||
|
||||
@@ -275,20 +280,20 @@ ARGON2_PUBLIC int argon2i_hash_raw(const uint32_t t_cost, const uint32_t m_cost,
|
||||
const uint32_t parallelism, const void *pwd,
|
||||
const size_t pwdlen, const void *salt,
|
||||
const size_t saltlen, void *hash,
|
||||
const size_t hashlen);
|
||||
const size_t hashlen, long volatile* pAbortKeyDerivation);
|
||||
|
||||
ARGON2_PUBLIC int argon2d_hash_raw(const uint32_t t_cost, const uint32_t m_cost,
|
||||
const uint32_t parallelism, const void *pwd,
|
||||
const size_t pwdlen, const void *salt,
|
||||
const size_t saltlen, void *hash,
|
||||
const size_t hashlen);
|
||||
const size_t hashlen, long volatile* pAbortKeyDerivation);
|
||||
|
||||
ARGON2_PUBLIC int argon2id_hash_raw(const uint32_t t_cost,
|
||||
const uint32_t m_cost,
|
||||
const uint32_t parallelism, const void *pwd,
|
||||
const size_t pwdlen, const void *salt,
|
||||
const size_t saltlen, void *hash,
|
||||
const size_t hashlen);
|
||||
const size_t hashlen, long volatile *pAbortKeyDerivation);
|
||||
|
||||
/* generic function underlying the above ones */
|
||||
ARGON2_PUBLIC int argon2_hash(const uint32_t t_cost, const uint32_t m_cost,
|
||||
@@ -296,7 +301,7 @@ ARGON2_PUBLIC int argon2_hash(const uint32_t t_cost, const uint32_t m_cost,
|
||||
const size_t pwdlen, const void *salt,
|
||||
const size_t saltlen, void *hash,
|
||||
const size_t hashlen, argon2_type type,
|
||||
const uint32_t version);
|
||||
const uint32_t version, long volatile *pAbortKeyDerivation);
|
||||
|
||||
/**
|
||||
* Argon2d: Version of Argon2 that picks memory blocks depending
|
||||
|
||||
@@ -24,6 +24,9 @@
|
||||
#include <memory.h>
|
||||
#include <stdlib.h>
|
||||
#endif
|
||||
#include "Crypto/config.h"
|
||||
#include "Crypto/cpu.h"
|
||||
#include "Crypto/misc.h"
|
||||
|
||||
const char *argon2_type2string(argon2_type type, int uppercase) {
|
||||
switch (type) {
|
||||
@@ -91,6 +94,9 @@ int argon2_ctx(argon2_context *context, argon2_type type) {
|
||||
result = fill_memory_blocks(&instance);
|
||||
|
||||
if (ARGON2_OK != result) {
|
||||
// If cancelled, we must still free the allocated memory!
|
||||
free_memory(context, (uint8_t *)instance.memory,
|
||||
instance.memory_blocks, sizeof(block));
|
||||
return result;
|
||||
}
|
||||
/* 5. Finalization */
|
||||
@@ -103,7 +109,7 @@ int argon2_hash(const uint32_t t_cost, const uint32_t m_cost,
|
||||
const uint32_t parallelism, const void *pwd,
|
||||
const size_t pwdlen, const void *salt, const size_t saltlen,
|
||||
void *hash, const size_t hashlen, argon2_type type,
|
||||
const uint32_t version){
|
||||
const uint32_t version, long volatile *pAbortKeyDerivation){
|
||||
|
||||
argon2_context context;
|
||||
int result;
|
||||
@@ -148,6 +154,7 @@ int argon2_hash(const uint32_t t_cost, const uint32_t m_cost,
|
||||
context.free_cbk = NULL;
|
||||
context.flags = ARGON2_DEFAULT_FLAGS;
|
||||
context.version = version;
|
||||
context.pAbortKeyDerivation = pAbortKeyDerivation;
|
||||
|
||||
result = argon2_ctx(&context, type);
|
||||
|
||||
@@ -171,28 +178,28 @@ int argon2_hash(const uint32_t t_cost, const uint32_t m_cost,
|
||||
int argon2i_hash_raw(const uint32_t t_cost, const uint32_t m_cost,
|
||||
const uint32_t parallelism, const void *pwd,
|
||||
const size_t pwdlen, const void *salt,
|
||||
const size_t saltlen, void *hash, const size_t hashlen) {
|
||||
const size_t saltlen, void *hash, const size_t hashlen, long volatile* pAbortKeyDerivation) {
|
||||
|
||||
return argon2_hash(t_cost, m_cost, parallelism, pwd, pwdlen, salt, saltlen,
|
||||
hash, hashlen, Argon2_i, ARGON2_VERSION_NUMBER);
|
||||
hash, hashlen, Argon2_i, ARGON2_VERSION_NUMBER, pAbortKeyDerivation);
|
||||
}
|
||||
|
||||
int argon2d_hash_raw(const uint32_t t_cost, const uint32_t m_cost,
|
||||
const uint32_t parallelism, const void *pwd,
|
||||
const size_t pwdlen, const void *salt,
|
||||
const size_t saltlen, void *hash, const size_t hashlen) {
|
||||
const size_t saltlen, void *hash, const size_t hashlen, long volatile* pAbortKeyDerivation) {
|
||||
|
||||
return argon2_hash(t_cost, m_cost, parallelism, pwd, pwdlen, salt, saltlen,
|
||||
hash, hashlen, Argon2_d, ARGON2_VERSION_NUMBER);
|
||||
hash, hashlen, Argon2_d, ARGON2_VERSION_NUMBER, pAbortKeyDerivation);
|
||||
}
|
||||
|
||||
int argon2id_hash_raw(const uint32_t t_cost, const uint32_t m_cost,
|
||||
const uint32_t parallelism, const void *pwd,
|
||||
const size_t pwdlen, const void *salt,
|
||||
const size_t saltlen, void *hash, const size_t hashlen) {
|
||||
const size_t saltlen, void *hash, const size_t hashlen, long volatile *pAbortKeyDerivation) {
|
||||
return argon2_hash(t_cost, m_cost, parallelism, pwd, pwdlen, salt, saltlen,
|
||||
hash, hashlen, Argon2_id,
|
||||
ARGON2_VERSION_NUMBER);
|
||||
ARGON2_VERSION_NUMBER, pAbortKeyDerivation);
|
||||
}
|
||||
|
||||
int argon2d_ctx(argon2_context *context) {
|
||||
|
||||
@@ -140,6 +140,8 @@ static BLAKE2_INLINE void store48(void *dst, uint64_t w) {
|
||||
*p++ = (uint8_t)w;
|
||||
}
|
||||
|
||||
/* removed since they are defined in VeraCrypt headers */
|
||||
/*
|
||||
static BLAKE2_INLINE uint32_t rotr32(const uint32_t w, const unsigned c) {
|
||||
return (w >> c) | (w << (32 - c));
|
||||
}
|
||||
@@ -147,7 +149,7 @@ static BLAKE2_INLINE uint32_t rotr32(const uint32_t w, const unsigned c) {
|
||||
static BLAKE2_INLINE uint64_t rotr64(const uint64_t w, const unsigned c) {
|
||||
return (w >> c) | (w << (64 - c));
|
||||
}
|
||||
|
||||
*/
|
||||
void clear_internal_memory(void *v, size_t n);
|
||||
|
||||
#endif
|
||||
|
||||
@@ -15,6 +15,9 @@
|
||||
* software. If not, they may be obtained at the above URLs.
|
||||
*/
|
||||
|
||||
#include "Crypto/config.h"
|
||||
#include "Crypto/cpu.h"
|
||||
#include "Crypto/misc.h"
|
||||
#include "blake2.h"
|
||||
#include "blake2-impl.h"
|
||||
|
||||
|
||||
@@ -20,13 +20,14 @@
|
||||
|
||||
#include "blake2-impl.h"
|
||||
|
||||
#include <emmintrin.h>
|
||||
/* remove to solve build errors under Windows Driver since */
|
||||
//#include <emmintrin.h>
|
||||
#if defined(__SSSE3__)
|
||||
#include <tmmintrin.h> /* for _mm_shuffle_epi8 and _mm_alignr_epi8 */
|
||||
//#include <tmmintrin.h> /* for _mm_shuffle_epi8 and _mm_alignr_epi8 */
|
||||
#endif
|
||||
|
||||
#if defined(__XOP__) && (defined(__GNUC__) || defined(__clang__))
|
||||
#include <x86intrin.h>
|
||||
//#include <x86intrin.h>
|
||||
#endif
|
||||
|
||||
#if !defined(__AVX512F__)
|
||||
@@ -180,7 +181,7 @@ static BLAKE2_INLINE __m128i fBlaMka(__m128i x, __m128i y) {
|
||||
} while ((void)0, 0)
|
||||
#else /* __AVX2__ */
|
||||
|
||||
#include <immintrin.h>
|
||||
//#include <immintrin.h>
|
||||
|
||||
#define rotr32(x) _mm256_shuffle_epi32(x, _MM_SHUFFLE(2, 3, 0, 1))
|
||||
#define rotr24(x) _mm256_shuffle_epi8(x, _mm256_setr_epi8(3, 4, 5, 6, 7, 0, 1, 2, 11, 12, 13, 14, 15, 8, 9, 10, 3, 4, 5, 6, 7, 0, 1, 2, 11, 12, 13, 14, 15, 8, 9, 10))
|
||||
@@ -329,7 +330,7 @@ static BLAKE2_INLINE __m128i fBlaMka(__m128i x, __m128i y) {
|
||||
|
||||
#else /* __AVX512F__ */
|
||||
|
||||
#include <immintrin.h>
|
||||
//#include <immintrin.h>
|
||||
|
||||
#define ror64(x, n) _mm512_ror_epi64((x), (n))
|
||||
|
||||
|
||||
@@ -19,6 +19,13 @@
|
||||
|
||||
|
||||
#include "core.h"
|
||||
#include "Crypto/config.h"
|
||||
#if !defined(_UEFI)
|
||||
#include <memory.h>
|
||||
#include <stdlib.h>
|
||||
#endif
|
||||
#include "Crypto/cpu.h"
|
||||
#include "Crypto/misc.h"
|
||||
#include "blake2/blake2.h"
|
||||
#include "blake2/blake2-impl.h"
|
||||
|
||||
@@ -206,12 +213,16 @@ uint32_t index_alpha(const argon2_instance_t *instance,
|
||||
/* Single-threaded version for p=1 case */
|
||||
static int fill_memory_blocks_st(argon2_instance_t *instance) {
|
||||
uint32_t r, s, l;
|
||||
int result = ARGON2_OK;
|
||||
|
||||
for (r = 0; r < instance->passes; ++r) {
|
||||
for (s = 0; s < ARGON2_SYNC_POINTS; ++s) {
|
||||
for (l = 0; l < instance->lanes; ++l) {
|
||||
argon2_position_t position = {r, l, (uint8_t)s, 0};
|
||||
fill_segment(instance, position);
|
||||
result = fill_segment(instance, position);
|
||||
if (result != ARGON2_OK) {
|
||||
return result;
|
||||
}
|
||||
}
|
||||
}
|
||||
#ifdef GENKAT
|
||||
|
||||
@@ -216,7 +216,7 @@ void finalize(const argon2_context *context, argon2_instance_t *instance);
|
||||
* @param position Current position
|
||||
* @pre all block pointers must be valid
|
||||
*/
|
||||
void fill_segment(const argon2_instance_t *instance,
|
||||
int fill_segment(const argon2_instance_t *instance,
|
||||
argon2_position_t position);
|
||||
|
||||
/*
|
||||
|
||||
+133
-104
@@ -358,6 +358,8 @@ fill_block:
|
||||
jne .L5
|
||||
jmp .L4
|
||||
|
||||
|
||||
align 16
|
||||
next_addresses:
|
||||
push rdi
|
||||
push rbx
|
||||
@@ -386,6 +388,8 @@ next_addresses:
|
||||
pop rdi
|
||||
ret
|
||||
|
||||
|
||||
align 16
|
||||
global fill_segment_avx2
|
||||
fill_segment_avx2:
|
||||
push r15
|
||||
@@ -403,7 +407,7 @@ fill_segment_avx2:
|
||||
vmovdqu [rsp+48], xmm1
|
||||
and r14, -32
|
||||
test rcx, rcx
|
||||
je .L44
|
||||
je .L37
|
||||
mov edx, dword [rcx+36]
|
||||
cmp edx, 1
|
||||
je .L18
|
||||
@@ -413,47 +417,54 @@ fill_segment_avx2:
|
||||
je .L19
|
||||
mov ebp, dword [rsp+52]
|
||||
test r12d, r12d
|
||||
jne .L37
|
||||
xor r12d, r12d
|
||||
test al, al
|
||||
sete r12b
|
||||
je .L51
|
||||
xor r15d, r15d
|
||||
add r12d, r12d
|
||||
xor r12d, r12d
|
||||
.L20:
|
||||
mov r8d, dword [rbx+24]
|
||||
mov r9d, dword [rbx+20]
|
||||
mov ecx, dword [rbx+24]
|
||||
mov r8d, dword [rbx+20]
|
||||
xor edx, edx
|
||||
mov ecx, 128
|
||||
mov rdi, r14
|
||||
imul ebp, r8d
|
||||
imul eax, r9d
|
||||
imul ebp, ecx
|
||||
imul eax, r8d
|
||||
add ebp, r12d
|
||||
add ebp, eax
|
||||
mov eax, ebp
|
||||
lea r13d, [rbp-1]
|
||||
div r8d
|
||||
lea eax, [rbp+r8-1]
|
||||
div ecx
|
||||
lea eax, [rbp+rcx-1]
|
||||
mov ecx, 128
|
||||
test edx, edx
|
||||
cmove r13d, eax
|
||||
lea rax, [rsp+64]
|
||||
mov qword [rsp+40], rax
|
||||
mov esi, r13d
|
||||
sal rsi, 10
|
||||
add rsi, qword [rbx]
|
||||
rep movsq
|
||||
cmp r12d, r9d
|
||||
jnb .L44
|
||||
lea rax, [rsp+1088]
|
||||
mov qword [rsp+32], rax
|
||||
lea rax, [rsp+64]
|
||||
mov qword [rsp+40], rax
|
||||
jmp .L35
|
||||
align 16
|
||||
align 8
|
||||
.L46:
|
||||
cmp r12d, r8d
|
||||
jb .L24
|
||||
jmp .L36
|
||||
|
||||
align 16
|
||||
align 8
|
||||
.L53:
|
||||
mov esi, r12d
|
||||
and esi, 127
|
||||
je .L52
|
||||
.L29:
|
||||
mov edx, dword [rsp+48]
|
||||
mov eax, esi
|
||||
mov ecx, dword [rsp+52]
|
||||
mov r8, qword [rsp+64+rax*8]
|
||||
test edx, edx
|
||||
jne .L31
|
||||
.L54:
|
||||
cmp byte [rsp+56], 0
|
||||
jne .L30
|
||||
mov rdi, rcx
|
||||
jne .L31
|
||||
mov rsi, rcx
|
||||
mov r9d, 1
|
||||
.L31:
|
||||
.L32:
|
||||
lea rdx, [rsp+48]
|
||||
mov rcx, rbx
|
||||
mov dword [rsp+60], r12d
|
||||
@@ -461,7 +472,7 @@ fill_segment_avx2:
|
||||
mov edx, dword [rbx+24]
|
||||
mov r8, qword [rbx]
|
||||
mov eax, eax
|
||||
imul rdx, rdi
|
||||
imul rdx, rsi
|
||||
add rdx, rax
|
||||
mov eax, ebp
|
||||
sal rdx, 10
|
||||
@@ -469,55 +480,80 @@ fill_segment_avx2:
|
||||
add rdx, r8
|
||||
add r8, rax
|
||||
cmp dword [rbx+8], 16
|
||||
je .L32
|
||||
je .L33
|
||||
mov eax, dword [rsp+48]
|
||||
test eax, eax
|
||||
je .L32
|
||||
je .L33
|
||||
mov r9d, 1
|
||||
mov rcx, r14
|
||||
call fill_block
|
||||
.L34:
|
||||
add r12d, 1
|
||||
cmp r12d, dword [rbx+20]
|
||||
jnb .L44
|
||||
mov r8d, dword [rbx+24]
|
||||
add ebp, 1
|
||||
.L35:
|
||||
call fill_block
|
||||
cmp r12d, dword [rbx+20]
|
||||
jnb .L36
|
||||
.L24:
|
||||
test r12b, 63
|
||||
jne .L25
|
||||
mov rax, qword [rbx+48]
|
||||
mov rax, qword [rax+96]
|
||||
test rax, rax
|
||||
je .L25
|
||||
mov eax, dword [rax]
|
||||
test eax, eax
|
||||
jne .L41
|
||||
.L25:
|
||||
mov eax, ebp
|
||||
xor edx, edx
|
||||
div r8d
|
||||
div dword [rbx+24]
|
||||
cmp edx, 1
|
||||
je .L25
|
||||
je .L26
|
||||
mov eax, r13d
|
||||
add r13d, 1
|
||||
.L26:
|
||||
.L27:
|
||||
test r15d, r15d
|
||||
je .L27
|
||||
mov edi, r12d
|
||||
and edi, 127
|
||||
je .L45
|
||||
.L28:
|
||||
mov eax, edi
|
||||
mov r8, qword [rsp+64+rax*8]
|
||||
.L29:
|
||||
jne .L53
|
||||
mov edx, dword [rsp+48]
|
||||
sal rax, 10
|
||||
add rax, qword [rbx]
|
||||
mov r8, qword [rax]
|
||||
mov ecx, dword [rsp+52]
|
||||
test edx, edx
|
||||
je .L46
|
||||
.L30:
|
||||
mov edi, dword [rbx+28]
|
||||
je .L54
|
||||
.L31:
|
||||
mov esi, dword [rbx+28]
|
||||
mov rax, r8
|
||||
xor edx, edx
|
||||
xor r9d, r9d
|
||||
shr rax, 32
|
||||
div rdi
|
||||
div rsi
|
||||
cmp rdx, rcx
|
||||
mov rdi, rdx
|
||||
mov rsi, rdx
|
||||
sete r9b
|
||||
jmp .L31
|
||||
align 16
|
||||
align 8
|
||||
.L44:
|
||||
jmp .L32
|
||||
|
||||
align 16
|
||||
align 8
|
||||
.L51:
|
||||
xor r12d, r12d
|
||||
test al, al
|
||||
sete r12b
|
||||
xor r15d, r15d
|
||||
add r12d, r12d
|
||||
jmp .L20
|
||||
|
||||
align 16
|
||||
align 8
|
||||
.L33:
|
||||
xor r9d, r9d
|
||||
mov rcx, r14
|
||||
add r12d, 1
|
||||
add ebp, 1
|
||||
call fill_block
|
||||
cmp r12d, dword [rbx+20]
|
||||
jb .L24
|
||||
.L36:
|
||||
xor eax, eax
|
||||
.L16:
|
||||
add rsp, 3160
|
||||
pop rbx
|
||||
pop rsi
|
||||
@@ -528,51 +564,35 @@ fill_segment_avx2:
|
||||
pop r14
|
||||
pop r15
|
||||
ret
|
||||
align 16
|
||||
align 8
|
||||
|
||||
align 16
|
||||
align 8
|
||||
.L26:
|
||||
lea eax, [rbp-1]
|
||||
mov r13d, ebp
|
||||
jmp .L27
|
||||
|
||||
align 16
|
||||
align 8
|
||||
.L52:
|
||||
mov rcx, qword [rsp+40]
|
||||
lea rdx, [rsp+1088]
|
||||
call next_addresses
|
||||
jmp .L29
|
||||
|
||||
align 16
|
||||
align 8
|
||||
.L19:
|
||||
test r12d, r12d
|
||||
jne .L47
|
||||
jne .L55
|
||||
cmp al, 1
|
||||
jbe .L18
|
||||
mov ebp, dword [rsp+52]
|
||||
xor r15d, r15d
|
||||
jmp .L20
|
||||
align 16
|
||||
align 8
|
||||
.L32:
|
||||
xor r9d, r9d
|
||||
mov rcx, r14
|
||||
call fill_block
|
||||
jmp .L34
|
||||
align 16
|
||||
align 8
|
||||
.L27:
|
||||
sal rax, 10
|
||||
add rax, qword [rbx]
|
||||
mov r8, qword [rax]
|
||||
jmp .L29
|
||||
align 16
|
||||
align 8
|
||||
.L25:
|
||||
lea eax, [rbp-1]
|
||||
mov r13d, ebp
|
||||
jmp .L26
|
||||
align 16
|
||||
align 8
|
||||
.L45:
|
||||
mov rdx, qword [rsp+32]
|
||||
mov rcx, qword [rsp+40]
|
||||
call next_addresses
|
||||
jmp .L28
|
||||
align 16
|
||||
align 8
|
||||
.L37:
|
||||
xor r15d, r15d
|
||||
xor r12d, r12d
|
||||
jmp .L20
|
||||
align 16
|
||||
align 8
|
||||
|
||||
align 16
|
||||
align 8
|
||||
.L18:
|
||||
xor edx, edx
|
||||
lea rcx, [rsp+1088]
|
||||
@@ -583,18 +603,18 @@ fill_segment_avx2:
|
||||
vpinsrd xmm0, xmm2, dword [rbx+36], 1
|
||||
movzx edx, byte [rsp+56]
|
||||
mov qword [rsp+1088], rax
|
||||
mov ecx, dword [rbx+16]
|
||||
mov edi, dword [rbx+16]
|
||||
mov r12, rax
|
||||
mov eax, dword [rsp+52]
|
||||
vpmovzxdq xmm0, xmm0
|
||||
mov qword [rsp+1104], rdx
|
||||
mov qword [rsp+1112], rcx
|
||||
mov qword [rsp+1112], rdi
|
||||
mov qword [rsp+1096], rax
|
||||
mov rbp, rax
|
||||
mov rax, rdx
|
||||
vmovdqu [rsp+1120], xmm0
|
||||
test r12d, r12d
|
||||
jne .L38
|
||||
jne .L39
|
||||
test dl, dl
|
||||
jne .L20
|
||||
lea rcx, [rsp+64]
|
||||
@@ -604,19 +624,28 @@ fill_segment_avx2:
|
||||
mov ebp, dword [rsp+52]
|
||||
movzx eax, byte [rsp+56]
|
||||
jmp .L20
|
||||
align 16
|
||||
align 8
|
||||
.L47:
|
||||
|
||||
align 16
|
||||
align 8
|
||||
.L41:
|
||||
mov eax, -36
|
||||
jmp .L16
|
||||
|
||||
.L55:
|
||||
mov ebp, dword [rsp+52]
|
||||
xor r15d, r15d
|
||||
xor r12d, r12d
|
||||
jmp .L20
|
||||
align 16
|
||||
align 8
|
||||
.L38:
|
||||
|
||||
.L39:
|
||||
xor r12d, r12d
|
||||
jmp .L20
|
||||
|
||||
.L37:
|
||||
mov eax, -25
|
||||
jmp .L16
|
||||
|
||||
|
||||
section .rdata align=32
|
||||
LC0:
|
||||
db 3,4,5,6,7,0,1,2,11,12,13,14,15,8,9,10
|
||||
@@ -626,8 +655,8 @@ LC1:
|
||||
db 2,3,4,5,6,7,0,1,10,11,12,13,14,15,8,9
|
||||
db 2,3,4,5,6,7,0,1,10,11,12,13,14,15,8,9
|
||||
|
||||
; External functions
|
||||
; External symbols
|
||||
extern index_alpha
|
||||
extern init_block_value
|
||||
|
||||
; end of file
|
||||
; End of file
|
||||
|
||||
@@ -20,6 +20,9 @@
|
||||
|
||||
#include "argon2.h"
|
||||
#include "core.h"
|
||||
#include "Crypto/config.h"
|
||||
#include "Crypto/cpu.h"
|
||||
#include "Crypto/misc.h"
|
||||
|
||||
#if defined(__AVX2__)
|
||||
|
||||
@@ -88,7 +91,7 @@ static void next_addresses(block *address_block, block *input_block) {
|
||||
fill_block(zero2_block, address_block, address_block, 0);
|
||||
}
|
||||
|
||||
void fill_segment_avx2(const argon2_instance_t *instance,
|
||||
int fill_segment_avx2(const argon2_instance_t *instance,
|
||||
argon2_position_t position) {
|
||||
block *ref_block = NULL, *curr_block = NULL;
|
||||
block address_block, input_block;
|
||||
@@ -99,7 +102,7 @@ void fill_segment_avx2(const argon2_instance_t *instance,
|
||||
int data_independent_addressing;
|
||||
|
||||
if (instance == NULL) {
|
||||
return;
|
||||
return ARGON2_INCORRECT_PARAMETER;
|
||||
}
|
||||
|
||||
data_independent_addressing =
|
||||
@@ -145,6 +148,12 @@ void fill_segment_avx2(const argon2_instance_t *instance,
|
||||
|
||||
for (i = starting_index; i < instance->segment_length;
|
||||
++i, ++curr_offset, ++prev_offset) {
|
||||
// Check every 64 blocks. This is a good balance for responsiveness.
|
||||
if ((i & 63) == 0 && instance->context_ptr->pAbortKeyDerivation &&
|
||||
*instance->context_ptr->pAbortKeyDerivation)
|
||||
{
|
||||
return ARGON2_OPERATION_CANCELLED; // Return cancellation code
|
||||
}
|
||||
/*1.1 Rotating prev_offset if needed */
|
||||
if (curr_offset % instance->lane_length == 1) {
|
||||
prev_offset = curr_offset - 1;
|
||||
@@ -191,11 +200,13 @@ void fill_segment_avx2(const argon2_instance_t *instance,
|
||||
}
|
||||
}
|
||||
}
|
||||
return ARGON2_OK;
|
||||
}
|
||||
#else
|
||||
void fill_segment_avx2(const argon2_instance_t* instance,
|
||||
int fill_segment_avx2(const argon2_instance_t* instance,
|
||||
argon2_position_t position) {
|
||||
(void)instance;
|
||||
(void)position;
|
||||
return ARGON2_INCORRECT_PARAMETER; /* AVX2 not available */
|
||||
}
|
||||
#endif
|
||||
|
||||
@@ -19,12 +19,14 @@
|
||||
|
||||
#include "argon2.h"
|
||||
#include "core.h"
|
||||
#include "Crypto/config.h"
|
||||
#include "Crypto/cpu.h"
|
||||
#include "Crypto/misc.h"
|
||||
|
||||
#if CRYPTOPP_BOOL_SSE2_INTRINSICS_AVAILABLE
|
||||
|
||||
#include "blake2/blake2.h"
|
||||
#include "blake2/blamka-round-opt.h"
|
||||
#include "Crypto/config.h"
|
||||
|
||||
/*
|
||||
* Function fills a new memory block and optionally XORs the old block over the new one.
|
||||
@@ -91,7 +93,7 @@ static void next_addresses(block *address_block, block *input_block) {
|
||||
fill_block(zero2_block, address_block, address_block, 0);
|
||||
}
|
||||
|
||||
void fill_segment_sse2(const argon2_instance_t *instance,
|
||||
int fill_segment_sse2(const argon2_instance_t *instance,
|
||||
argon2_position_t position) {
|
||||
block *ref_block = NULL, *curr_block = NULL;
|
||||
block address_block, input_block;
|
||||
@@ -102,7 +104,7 @@ void fill_segment_sse2(const argon2_instance_t *instance,
|
||||
int data_independent_addressing;
|
||||
|
||||
if (instance == NULL) {
|
||||
return;
|
||||
return ARGON2_INCORRECT_PARAMETER;
|
||||
}
|
||||
|
||||
data_independent_addressing =
|
||||
@@ -148,6 +150,13 @@ void fill_segment_sse2(const argon2_instance_t *instance,
|
||||
|
||||
for (i = starting_index; i < instance->segment_length;
|
||||
++i, ++curr_offset, ++prev_offset) {
|
||||
|
||||
// Check every 64 blocks. This is a good balance for responsiveness.
|
||||
if ((i & 63) == 0 && instance->context_ptr->pAbortKeyDerivation &&
|
||||
*instance->context_ptr->pAbortKeyDerivation)
|
||||
{
|
||||
return ARGON2_OPERATION_CANCELLED; // Return cancellation code
|
||||
}
|
||||
/*1.1 Rotating prev_offset if needed */
|
||||
if (curr_offset % instance->lane_length == 1) {
|
||||
prev_offset = curr_offset - 1;
|
||||
@@ -194,11 +203,13 @@ void fill_segment_sse2(const argon2_instance_t *instance,
|
||||
}
|
||||
}
|
||||
}
|
||||
return ARGON2_OK;
|
||||
}
|
||||
#else
|
||||
void fill_segment_sse2(const argon2_instance_t* instance,
|
||||
int fill_segment_sse2(const argon2_instance_t* instance,
|
||||
argon2_position_t position) {
|
||||
(void)instance;
|
||||
(void)position;
|
||||
return ARGON2_INCORRECT_PARAMETER; // SSE2 not available
|
||||
}
|
||||
#endif
|
||||
|
||||
@@ -89,7 +89,7 @@ static void next_addresses(block *address_block, block *input_block,
|
||||
fill_block(zero_block, address_block, address_block, 0);
|
||||
}
|
||||
|
||||
void fill_segment_ref(const argon2_instance_t *instance,
|
||||
int fill_segment_ref(const argon2_instance_t *instance,
|
||||
argon2_position_t position) {
|
||||
block *ref_block = NULL, *curr_block = NULL;
|
||||
block address_block, input_block, zero_block;
|
||||
@@ -100,7 +100,7 @@ void fill_segment_ref(const argon2_instance_t *instance,
|
||||
int data_independent_addressing;
|
||||
|
||||
if (instance == NULL) {
|
||||
return;
|
||||
return ARGON2_INCORRECT_PARAMETER;
|
||||
}
|
||||
|
||||
data_independent_addressing =
|
||||
@@ -145,6 +145,11 @@ void fill_segment_ref(const argon2_instance_t *instance,
|
||||
|
||||
for (i = starting_index; i < instance->segment_length;
|
||||
++i, ++curr_offset, ++prev_offset) {
|
||||
if ((i & 63) == 0 && instance->context_ptr->pAbortKeyDerivation &&
|
||||
*instance->context_ptr->pAbortKeyDerivation)
|
||||
{
|
||||
return ARGON2_OPERATION_CANCELLED; // Return cancellation code
|
||||
}
|
||||
/*1.1 Rotating prev_offset if needed */
|
||||
if (curr_offset % instance->lane_length == 1) {
|
||||
prev_offset = curr_offset - 1;
|
||||
@@ -193,29 +198,30 @@ void fill_segment_ref(const argon2_instance_t *instance,
|
||||
}
|
||||
}
|
||||
}
|
||||
return ARGON2_OK;
|
||||
}
|
||||
|
||||
#if CRYPTOPP_BOOL_X64 || CRYPTOPP_BOOL_X86 || CRYPTOPP_BOOL_X32
|
||||
extern void fill_segment_sse2(const argon2_instance_t* instance,
|
||||
extern int fill_segment_sse2(const argon2_instance_t* instance,
|
||||
argon2_position_t position);
|
||||
extern void fill_segment_avx2(const argon2_instance_t* instance,
|
||||
extern int fill_segment_avx2(const argon2_instance_t* instance,
|
||||
argon2_position_t position);
|
||||
#endif
|
||||
|
||||
void fill_segment(const argon2_instance_t* instance,
|
||||
int fill_segment(const argon2_instance_t* instance,
|
||||
argon2_position_t position) {
|
||||
#if CRYPTOPP_BOOL_X64 || CRYPTOPP_BOOL_X86 || CRYPTOPP_BOOL_X32
|
||||
if (HasSAVX2())
|
||||
{
|
||||
fill_segment_avx2(instance, position);
|
||||
return fill_segment_avx2(instance, position);
|
||||
}
|
||||
else if (HasSSE2())
|
||||
{
|
||||
fill_segment_sse2(instance, position);
|
||||
return fill_segment_sse2(instance, position);
|
||||
}
|
||||
else
|
||||
#endif
|
||||
{
|
||||
fill_segment_ref(instance, position);
|
||||
return fill_segment_ref(instance, position);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -92,6 +92,7 @@ static int argon2_selftest_ctx (void)
|
||||
context.free_cbk = NULL;
|
||||
context.flags = ARGON2_DEFAULT_FLAGS;
|
||||
context.version = ARGON2_VERSION_13;
|
||||
context.pAbortKeyDerivation = NULL; /* No abort function */
|
||||
|
||||
/* Test execution for Argon2d, Argon2i, Argon2id */
|
||||
|
||||
@@ -118,7 +119,7 @@ static int hashtest (uint32_t t, uint32_t m, uint32_t p, const char *pwd,
|
||||
uint8_t out[32];
|
||||
|
||||
return (argon2_hash(t, 1 << m, p, pwd, strlen (pwd), salt, strlen (salt),
|
||||
out, 32, type, ARGON2_VERSION_NUMBER) == ARGON2_OK &&
|
||||
out, 32, type, ARGON2_VERSION_NUMBER, NULL) == ARGON2_OK &&
|
||||
memcmp (out, ref, 32) == 0) ? 0 : 1;
|
||||
}
|
||||
|
||||
|
||||
@@ -143,6 +143,8 @@ extern __m128 _mm_shuffle_ps(__m128 _A, __m128 _B, unsigned int _Imm8);
|
||||
extern __m128i _mm_srli_si128(__m128i _A, int _Imm);
|
||||
extern __m128i _mm_slli_si128(__m128i _A, int _Imm);
|
||||
extern __m128i _mm_setzero_si128();
|
||||
extern __m128i _mm_mul_epu32(__m128i _A, __m128i _B);
|
||||
extern __m128i _mm_slli_epi64(__m128i _A, int _Count);
|
||||
#define _mm_xor_si64 _m_pxor
|
||||
#define _mm_empty _m_empty
|
||||
#define _MM_SHUFFLE(fp3,fp2,fp1,fp0) (((fp3) << 6) | ((fp2) << 4) | \
|
||||
|
||||
Reference in New Issue
Block a user