From 9dc24ba7d04204473a595b4466af5a7081062cfa Mon Sep 17 00:00:00 2001 From: Mounir IDRASSI Date: Sun, 29 Jun 2025 21:44:32 +0900 Subject: [PATCH] Windows: speedup PRF autodetection mode by implementing abort mechanism in PBKDF2/Argon2 primitives --- src/Common/Dlgcode.c | 12 +- src/Common/EncryptionThreadPool.c | 17 +- src/Common/EncryptionThreadPool.h | 2 +- src/Common/Pkcs5.c | 132 ++++++++-- src/Common/Pkcs5.h | 24 +- src/Common/Tests.c | 24 +- src/Common/Volumes.c | 69 +++-- src/Crypto/Argon2/include/argon2.h | 15 +- src/Crypto/Argon2/src/argon2.c | 21 +- src/Crypto/Argon2/src/blake2/blake2-impl.h | 4 +- src/Crypto/Argon2/src/blake2/blake2b.c | 3 + .../Argon2/src/blake2/blamka-round-opt.h | 11 +- src/Crypto/Argon2/src/core.c | 13 +- src/Crypto/Argon2/src/core.h | 2 +- src/Crypto/Argon2/src/opt_avx2.asm | 237 ++++++++++-------- src/Crypto/Argon2/src/opt_avx2.c | 17 +- src/Crypto/Argon2/src/opt_sse2.c | 19 +- src/Crypto/Argon2/src/ref.c | 22 +- src/Crypto/Argon2/src/selftest.c | 3 +- src/Crypto/cpu.h | 2 + src/Volume/Pkcs5Kdf.cpp | 16 +- 21 files changed, 437 insertions(+), 228 deletions(-) diff --git a/src/Common/Dlgcode.c b/src/Common/Dlgcode.c index cd3915ec..264d50c8 100644 --- a/src/Common/Dlgcode.c +++ b/src/Common/Dlgcode.c @@ -6445,32 +6445,32 @@ static BOOL PerformBenchmark(HWND hBenchDlg, HWND hwndDlg) case SHA512: /* PKCS-5 test with HMAC-SHA-512 used as the PRF */ - derive_key_sha512 ((const unsigned char*) "passphrase-1234567890", 21, (const unsigned char*) tmp_salt, 64, iterations, dk, MASTER_KEYDATA_SIZE); + derive_key_sha512 ((const unsigned char*) "passphrase-1234567890", 21, (const unsigned char*) tmp_salt, 64, iterations, dk, MASTER_KEYDATA_SIZE, NULL); break; case SHA256: /* PKCS-5 test with HMAC-SHA-256 used as the PRF */ - derive_key_sha256 ((const unsigned char*)"passphrase-1234567890", 21, (const unsigned char*)tmp_salt, 64, iterations, dk, MASTER_KEYDATA_SIZE); + derive_key_sha256 ((const unsigned char*)"passphrase-1234567890", 21, (const unsigned char*)tmp_salt, 64, iterations, dk, MASTER_KEYDATA_SIZE, NULL); break; #ifndef WOLFCRYPT_BACKEND case BLAKE2S: /* PKCS-5 test with HMAC-BLAKE2s used as the PRF */ - derive_key_blake2s ((const unsigned char*)"passphrase-1234567890", 21, (const unsigned char*)tmp_salt, 64, iterations, dk, MASTER_KEYDATA_SIZE); + derive_key_blake2s ((const unsigned char*)"passphrase-1234567890", 21, (const unsigned char*)tmp_salt, 64, iterations, dk, MASTER_KEYDATA_SIZE, NULL); break; case WHIRLPOOL: /* PKCS-5 test with HMAC-Whirlpool used as the PRF */ - derive_key_whirlpool ((const unsigned char*)"passphrase-1234567890", 21, (const unsigned char*)tmp_salt, 64, iterations, dk, MASTER_KEYDATA_SIZE); + derive_key_whirlpool ((const unsigned char*)"passphrase-1234567890", 21, (const unsigned char*)tmp_salt, 64, iterations, dk, MASTER_KEYDATA_SIZE, NULL); break; case STREEBOG: /* PKCS-5 test with HMAC-STREEBOG used as the PRF */ - derive_key_streebog((const unsigned char*)"passphrase-1234567890", 21, (const unsigned char*)tmp_salt, 64, iterations, dk, MASTER_KEYDATA_SIZE); + derive_key_streebog((const unsigned char*)"passphrase-1234567890", 21, (const unsigned char*)tmp_salt, 64, iterations, dk, MASTER_KEYDATA_SIZE, NULL); break; case ARGON2: /* test with ARGON2 used as the PRF */ - derive_key_argon2 ((const unsigned char*) "passphrase-1234567890", 21, (const unsigned char*)tmp_salt, 64, iterations, memoryCost, dk, MASTER_KEYDATA_SIZE); + derive_key_argon2 ((const unsigned char*) "passphrase-1234567890", 21, (const unsigned char*)tmp_salt, 64, iterations, memoryCost, dk, MASTER_KEYDATA_SIZE, NULL); break; } #endif diff --git a/src/Common/EncryptionThreadPool.c b/src/Common/EncryptionThreadPool.c index 41954b1d..e09cce30 100644 --- a/src/Common/EncryptionThreadPool.c +++ b/src/Common/EncryptionThreadPool.c @@ -107,7 +107,7 @@ typedef struct EncryptionThreadPoolWorkItemStruct int PasswordLength; int Pkcs5Prf; unsigned char *Salt; - + LONG volatile *pAbortKeyDerivation; } KeyDerivation; struct @@ -245,32 +245,32 @@ static TC_THREAD_PROC EncryptionThreadProc (void *threadArg) { case BLAKE2S: derive_key_blake2s (workItem->KeyDerivation.Password, workItem->KeyDerivation.PasswordLength, workItem->KeyDerivation.Salt, PKCS5_SALT_SIZE, - workItem->KeyDerivation.IterationCount, workItem->KeyDerivation.DerivedKey, GetMaxPkcs5OutSize()); + workItem->KeyDerivation.IterationCount, workItem->KeyDerivation.DerivedKey, GetMaxPkcs5OutSize(), workItem->KeyDerivation.pAbortKeyDerivation); break; case SHA512: derive_key_sha512 (workItem->KeyDerivation.Password, workItem->KeyDerivation.PasswordLength, workItem->KeyDerivation.Salt, PKCS5_SALT_SIZE, - workItem->KeyDerivation.IterationCount, workItem->KeyDerivation.DerivedKey, GetMaxPkcs5OutSize()); + workItem->KeyDerivation.IterationCount, workItem->KeyDerivation.DerivedKey, GetMaxPkcs5OutSize(), workItem->KeyDerivation.pAbortKeyDerivation); break; case WHIRLPOOL: derive_key_whirlpool (workItem->KeyDerivation.Password, workItem->KeyDerivation.PasswordLength, workItem->KeyDerivation.Salt, PKCS5_SALT_SIZE, - workItem->KeyDerivation.IterationCount, workItem->KeyDerivation.DerivedKey, GetMaxPkcs5OutSize()); + workItem->KeyDerivation.IterationCount, workItem->KeyDerivation.DerivedKey, GetMaxPkcs5OutSize(), workItem->KeyDerivation.pAbortKeyDerivation); break; case SHA256: derive_key_sha256 (workItem->KeyDerivation.Password, workItem->KeyDerivation.PasswordLength, workItem->KeyDerivation.Salt, PKCS5_SALT_SIZE, - workItem->KeyDerivation.IterationCount, workItem->KeyDerivation.DerivedKey, GetMaxPkcs5OutSize()); + workItem->KeyDerivation.IterationCount, workItem->KeyDerivation.DerivedKey, GetMaxPkcs5OutSize(), workItem->KeyDerivation.pAbortKeyDerivation); break; case STREEBOG: derive_key_streebog(workItem->KeyDerivation.Password, workItem->KeyDerivation.PasswordLength, workItem->KeyDerivation.Salt, PKCS5_SALT_SIZE, - workItem->KeyDerivation.IterationCount, workItem->KeyDerivation.DerivedKey, GetMaxPkcs5OutSize()); + workItem->KeyDerivation.IterationCount, workItem->KeyDerivation.DerivedKey, GetMaxPkcs5OutSize(), workItem->KeyDerivation.pAbortKeyDerivation); break; case ARGON2: derive_key_argon2(workItem->KeyDerivation.Password, workItem->KeyDerivation.PasswordLength, workItem->KeyDerivation.Salt, PKCS5_SALT_SIZE, - workItem->KeyDerivation.IterationCount, workItem->KeyDerivation.Memorycost, workItem->KeyDerivation.DerivedKey, GetMaxPkcs5OutSize()); + workItem->KeyDerivation.IterationCount, workItem->KeyDerivation.Memorycost, workItem->KeyDerivation.DerivedKey, GetMaxPkcs5OutSize(), workItem->KeyDerivation.pAbortKeyDerivation); break; default: @@ -533,7 +533,7 @@ void EncryptionThreadPoolStop () } -void EncryptionThreadPoolBeginKeyDerivation (TC_EVENT *completionEvent, TC_EVENT *noOutstandingWorkItemEvent, LONG *completionFlag, LONG *outstandingWorkItemCount, int pkcs5Prf, unsigned char *password, int passwordLength, unsigned char *salt, int iterationCount, int memoryCost, unsigned char *derivedKey) +void EncryptionThreadPoolBeginKeyDerivation (TC_EVENT *completionEvent, TC_EVENT *noOutstandingWorkItemEvent, LONG *completionFlag, LONG *outstandingWorkItemCount, int pkcs5Prf, unsigned char *password, int passwordLength, unsigned char *salt, int iterationCount, int memoryCost, unsigned char *derivedKey, LONG volatile *pAbortKeyDerivation) { EncryptionThreadPoolWorkItem *workItem; @@ -563,6 +563,7 @@ void EncryptionThreadPoolBeginKeyDerivation (TC_EVENT *completionEvent, TC_EVENT workItem->KeyDerivation.PasswordLength = passwordLength; workItem->KeyDerivation.Pkcs5Prf = pkcs5Prf; workItem->KeyDerivation.Salt = salt; + workItem->KeyDerivation.pAbortKeyDerivation = pAbortKeyDerivation; InterlockedIncrement (outstandingWorkItemCount); TC_CLEAR_EVENT (*noOutstandingWorkItemEvent); diff --git a/src/Common/EncryptionThreadPool.h b/src/Common/EncryptionThreadPool.h index 1d2ce7e3..5063daf2 100644 --- a/src/Common/EncryptionThreadPool.h +++ b/src/Common/EncryptionThreadPool.h @@ -32,7 +32,7 @@ typedef enum size_t GetCpuCount (WORD* pGroupCount); #endif -void EncryptionThreadPoolBeginKeyDerivation (TC_EVENT *completionEvent, TC_EVENT *noOutstandingWorkItemEvent, LONG *completionFlag, LONG *outstandingWorkItemCount, int pkcs5Prf, unsigned char *password, int passwordLength, unsigned char *salt, int iterationCount, int memoryCost, unsigned char *derivedKey); +void EncryptionThreadPoolBeginKeyDerivation (TC_EVENT *completionEvent, TC_EVENT *noOutstandingWorkItemEvent, LONG *completionFlag, LONG *outstandingWorkItemCount, int pkcs5Prf, unsigned char *password, int passwordLength, unsigned char *salt, int iterationCount, int memoryCost, unsigned char *derivedKey, LONG volatile *pAbortKeyDerivation); void EncryptionThreadPoolBeginReadVolumeHeaderFinalization (TC_EVENT *keyDerivationCompletedEvent, TC_EVENT *noOutstandingWorkItemEvent, LONG* outstandingWorkItemCount, void* keyInfoBuffer, int keyInfoBufferSize, void* keyDerivationWorkItems, int keyDerivationWorkItemsSize); void EncryptionThreadPoolDoWork (EncryptionThreadPoolWorkType type, uint8 *data, const UINT64_STRUCT *startUnitNo, uint32 unitCount, PCRYPTO_INFO cryptoInfo); BOOL EncryptionThreadPoolStart (size_t encryptionFreeCpuCount); diff --git a/src/Common/Pkcs5.c b/src/Common/Pkcs5.c index 0f0ea8dc..51391574 100644 --- a/src/Common/Pkcs5.c +++ b/src/Common/Pkcs5.c @@ -145,7 +145,11 @@ void hmac_sha256 } #endif -static void derive_u_sha256 (const unsigned char *salt, int salt_len, uint32 iterations, int b, hmac_sha256_ctx* hmac) +static void derive_u_sha256 (const unsigned char *salt, int salt_len, uint32 iterations, int b, hmac_sha256_ctx* hmac +#ifndef TC_WINDOWS_BOOT + , long volatile *pAbortKeyDerivation +#endif +) { unsigned char* k = hmac->k; unsigned char* u = hmac->u; @@ -186,6 +190,11 @@ static void derive_u_sha256 (const unsigned char *salt, int salt_len, uint32 ite /* remaining iterations */ while (c > 1) { +#ifndef TC_WINDOWS_BOOT + // CANCELLATION CHECK: Check every 1024 iterations + if (pAbortKeyDerivation && (c & 1023) == 0 && *pAbortKeyDerivation == 1) + return; // Abort derivation +#endif hmac_sha256_internal (k, SHA256_DIGESTSIZE, hmac); for (i = 0; i < SHA256_DIGESTSIZE; i++) { @@ -196,7 +205,11 @@ static void derive_u_sha256 (const unsigned char *salt, int salt_len, uint32 ite } -void derive_key_sha256 (const unsigned char *pwd, int pwd_len, const unsigned char *salt, int salt_len, uint32 iterations, unsigned char *dk, int dklen) +void derive_key_sha256 (const unsigned char *pwd, int pwd_len, const unsigned char *salt, int salt_len, uint32 iterations, unsigned char *dk, int dklen +#ifndef TC_WINDOWS_BOOT + , long volatile *pAbortKeyDerivation +#endif +) { hmac_sha256_ctx hmac; sha256_ctx* ctx; @@ -264,20 +277,36 @@ void derive_key_sha256 (const unsigned char *pwd, int pwd_len, const unsigned ch /* first l - 1 blocks */ for (b = 1; b < l; b++) { +#ifndef TC_WINDOWS_BOOT + derive_u_sha256 (salt, salt_len, iterations, b, &hmac, pAbortKeyDerivation); + // Check if the derivation was aborted + if (pAbortKeyDerivation && *pAbortKeyDerivation == 1) + goto cancelled; +#else derive_u_sha256 (salt, salt_len, iterations, b, &hmac); +#endif memcpy (dk, hmac.u, SHA256_DIGESTSIZE); dk += SHA256_DIGESTSIZE; } /* last block */ +#ifndef TC_WINDOWS_BOOT + derive_u_sha256 (salt, salt_len, iterations, b, &hmac, pAbortKeyDerivation); + // Check if the derivation was aborted (in case of only one block) + if (pAbortKeyDerivation && *pAbortKeyDerivation == 1) + goto cancelled; +#else derive_u_sha256 (salt, salt_len, iterations, b, &hmac); +#endif memcpy (dk, hmac.u, r); #if defined (DEVICE_DRIVER) && !defined(_M_ARM64) if (NT_SUCCESS (saveStatus)) KeRestoreExtendedProcessorState(&SaveState); #endif - +#ifndef TC_WINDOWS_BOOT +cancelled: +#endif /* Prevent possible leaks. */ burn (&hmac, sizeof(hmac)); #ifndef TC_WINDOWS_BOOT @@ -395,7 +424,7 @@ void hmac_sha512 burn (key, sizeof(key)); } -static void derive_u_sha512 (const unsigned char *salt, int salt_len, uint32 iterations, int b, hmac_sha512_ctx* hmac) +static void derive_u_sha512 (const unsigned char *salt, int salt_len, uint32 iterations, int b, hmac_sha512_ctx* hmac, long volatile *pAbortKeyDerivation) { unsigned char* k = hmac->k; unsigned char* u = hmac->u; @@ -413,6 +442,9 @@ static void derive_u_sha512 (const unsigned char *salt, int salt_len, uint32 ite /* remaining iterations */ for (c = 1; c < iterations; c++) { + // CANCELLATION CHECK: Check every 1024 iterations + if (pAbortKeyDerivation && (c & 1023) == 0 && *pAbortKeyDerivation == 1) + return; // Abort derivation hmac_sha512_internal (k, SHA512_DIGESTSIZE, hmac); for (i = 0; i < SHA512_DIGESTSIZE; i++) { @@ -422,7 +454,7 @@ static void derive_u_sha512 (const unsigned char *salt, int salt_len, uint32 ite } -void derive_key_sha512 (const unsigned char *pwd, int pwd_len, const unsigned char *salt, int salt_len, uint32 iterations, unsigned char *dk, int dklen) +void derive_key_sha512 (const unsigned char *pwd, int pwd_len, const unsigned char *salt, int salt_len, uint32 iterations, unsigned char *dk, int dklen, long volatile *pAbortKeyDerivation) { hmac_sha512_ctx hmac; sha512_ctx* ctx; @@ -489,20 +521,26 @@ void derive_key_sha512 (const unsigned char *pwd, int pwd_len, const unsigned ch /* first l - 1 blocks */ for (b = 1; b < l; b++) { - derive_u_sha512 (salt, salt_len, iterations, b, &hmac); + derive_u_sha512 (salt, salt_len, iterations, b, &hmac, pAbortKeyDerivation); + // Check if the derivation was aborted + if (pAbortKeyDerivation && *pAbortKeyDerivation == 1) + goto cancelled; memcpy (dk, hmac.u, SHA512_DIGESTSIZE); dk += SHA512_DIGESTSIZE; } /* last block */ - derive_u_sha512 (salt, salt_len, iterations, b, &hmac); + derive_u_sha512 (salt, salt_len, iterations, b, &hmac, pAbortKeyDerivation); + // Check if the derivation was aborted (in case of only one block) + if (pAbortKeyDerivation && *pAbortKeyDerivation == 1) + goto cancelled; memcpy (dk, hmac.u, r); #if defined (DEVICE_DRIVER) && !defined(_M_ARM64) if (NT_SUCCESS (saveStatus)) KeRestoreExtendedProcessorState(&SaveState); #endif - +cancelled: /* Prevent possible leaks. */ burn (&hmac, sizeof(hmac)); burn (key, sizeof(key)); @@ -619,7 +657,11 @@ void hmac_blake2s } #endif -static void derive_u_blake2s (const unsigned char *salt, int salt_len, uint32 iterations, int b, hmac_blake2s_ctx* hmac) +static void derive_u_blake2s (const unsigned char *salt, int salt_len, uint32 iterations, int b, hmac_blake2s_ctx* hmac +#ifndef TC_WINDOWS_BOOT + , volatile long *pAbortKeyDerivation +#endif +) { unsigned char* k = hmac->k; unsigned char* u = hmac->u; @@ -660,6 +702,11 @@ static void derive_u_blake2s (const unsigned char *salt, int salt_len, uint32 it /* remaining iterations */ while (c > 1) { +#ifndef TC_WINDOWS_BOOT + // CANCELLATION CHECK: Check every 1024 iterations + if (pAbortKeyDerivation && (c & 1023) == 0 && *pAbortKeyDerivation) + return; // Abort derivation +#endif hmac_blake2s_internal (k, BLAKE2S_DIGESTSIZE, hmac); for (i = 0; i < BLAKE2S_DIGESTSIZE; i++) { @@ -670,7 +717,11 @@ static void derive_u_blake2s (const unsigned char *salt, int salt_len, uint32 it } -void derive_key_blake2s (const unsigned char *pwd, int pwd_len, const unsigned char *salt, int salt_len, uint32 iterations, unsigned char *dk, int dklen) +void derive_key_blake2s (const unsigned char *pwd, int pwd_len, const unsigned char *salt, int salt_len, uint32 iterations, unsigned char *dk, int dklen +#ifndef TC_WINDOWS_BOOT + , volatile long *pAbortKeyDerivation +#endif +) { hmac_blake2s_ctx hmac; blake2s_state* ctx; @@ -738,20 +789,36 @@ void derive_key_blake2s (const unsigned char *pwd, int pwd_len, const unsigned c /* first l - 1 blocks */ for (b = 1; b < l; b++) { +#ifndef TC_WINDOWS_BOOT + derive_u_blake2s (salt, salt_len, iterations, b, &hmac, pAbortKeyDerivation); + // Check if the derivation was aborted + if (pAbortKeyDerivation && *pAbortKeyDerivation) + goto cancelled; +#else derive_u_blake2s (salt, salt_len, iterations, b, &hmac); +#endif memcpy (dk, hmac.u, BLAKE2S_DIGESTSIZE); dk += BLAKE2S_DIGESTSIZE; } /* last block */ +#ifndef TC_WINDOWS_BOOT + derive_u_blake2s (salt, salt_len, iterations, b, &hmac, pAbortKeyDerivation); + // Check if the derivation was aborted (in case of only one block) + if (pAbortKeyDerivation && *pAbortKeyDerivation) + goto cancelled; +#else derive_u_blake2s (salt, salt_len, iterations, b, &hmac); +#endif memcpy (dk, hmac.u, r); #if defined (DEVICE_DRIVER) && !defined(_M_ARM64) if (NT_SUCCESS (saveStatus)) KeRestoreExtendedProcessorState(&SaveState); #endif - +#ifndef TC_WINDOWS_BOOT +cancelled: +#endif /* Prevent possible leaks. */ burn (&hmac, sizeof(hmac)); #ifndef TC_WINDOWS_BOOT @@ -856,7 +923,7 @@ void hmac_whirlpool burn(&hmac, sizeof(hmac)); } -static void derive_u_whirlpool (const unsigned char *salt, int salt_len, uint32 iterations, int b, hmac_whirlpool_ctx* hmac) +static void derive_u_whirlpool (const unsigned char *salt, int salt_len, uint32 iterations, int b, hmac_whirlpool_ctx* hmac, volatile long *pAbortKeyDerivation) { unsigned char* u = hmac->u; unsigned char* k = hmac->k; @@ -874,6 +941,9 @@ static void derive_u_whirlpool (const unsigned char *salt, int salt_len, uint32 /* remaining iterations */ for (c = 1; c < iterations; c++) { + // CANCELLATION CHECK: Check every 1024 iterations + if (pAbortKeyDerivation && (c & 1023) == 0 && *pAbortKeyDerivation) + return; // Abort derivation hmac_whirlpool_internal (k, WHIRLPOOL_DIGESTSIZE, hmac); for (i = 0; i < WHIRLPOOL_DIGESTSIZE; i++) { @@ -882,7 +952,7 @@ static void derive_u_whirlpool (const unsigned char *salt, int salt_len, uint32 } } -void derive_key_whirlpool (const unsigned char *pwd, int pwd_len, const unsigned char *salt, int salt_len, uint32 iterations, unsigned char *dk, int dklen) +void derive_key_whirlpool (const unsigned char *pwd, int pwd_len, const unsigned char *salt, int salt_len, uint32 iterations, unsigned char *dk, int dklen, volatile long *pAbortKeyDerivation) { hmac_whirlpool_ctx hmac; WHIRLPOOL_CTX* ctx; @@ -942,15 +1012,21 @@ void derive_key_whirlpool (const unsigned char *pwd, int pwd_len, const unsigned /* first l - 1 blocks */ for (b = 1; b < l; b++) { - derive_u_whirlpool (salt, salt_len, iterations, b, &hmac); + derive_u_whirlpool (salt, salt_len, iterations, b, &hmac, pAbortKeyDerivation); + // Check if the derivation was aborted + if (pAbortKeyDerivation && *pAbortKeyDerivation) + goto cancelled; memcpy (dk, hmac.u, WHIRLPOOL_DIGESTSIZE); dk += WHIRLPOOL_DIGESTSIZE; } /* last block */ - derive_u_whirlpool (salt, salt_len, iterations, b, &hmac); + derive_u_whirlpool (salt, salt_len, iterations, b, &hmac, pAbortKeyDerivation); + // Check if the derivation was aborted (in case of only one block) + if (pAbortKeyDerivation && *pAbortKeyDerivation) + goto cancelled; memcpy (dk, hmac.u, r); - +cancelled: /* Prevent possible leaks. */ burn (&hmac, sizeof(hmac)); burn (key, sizeof(key)); @@ -1050,7 +1126,7 @@ void hmac_streebog burn(&hmac, sizeof(hmac)); } -static void derive_u_streebog (const unsigned char *salt, int salt_len, uint32 iterations, int b, hmac_streebog_ctx* hmac) +static void derive_u_streebog (const unsigned char *salt, int salt_len, uint32 iterations, int b, hmac_streebog_ctx* hmac, volatile long *pAbortKeyDerivation) { unsigned char* u = hmac->u; unsigned char* k = hmac->k; @@ -1068,6 +1144,9 @@ static void derive_u_streebog (const unsigned char *salt, int salt_len, uint32 i /* remaining iterations */ for (c = 1; c < iterations; c++) { + // CANCELLATION CHECK: Check every 1024 iterations + if (pAbortKeyDerivation && (c & 1023) == 0 && *pAbortKeyDerivation) + return; // Abort derivation hmac_streebog_internal (k, STREEBOG_DIGESTSIZE, hmac); for (i = 0; i < STREEBOG_DIGESTSIZE; i++) { @@ -1076,7 +1155,7 @@ static void derive_u_streebog (const unsigned char *salt, int salt_len, uint32 i } } -void derive_key_streebog (const unsigned char *pwd, int pwd_len, const unsigned char *salt, int salt_len, uint32 iterations, unsigned char *dk, int dklen) +void derive_key_streebog (const unsigned char *pwd, int pwd_len, const unsigned char *salt, int salt_len, uint32 iterations, unsigned char *dk, int dklen, volatile long *pAbortKeyDerivation) { hmac_streebog_ctx hmac; STREEBOG_CTX* ctx; @@ -1136,15 +1215,21 @@ void derive_key_streebog (const unsigned char *pwd, int pwd_len, const unsigned /* first l - 1 blocks */ for (b = 1; b < l; b++) { - derive_u_streebog (salt, salt_len, iterations, b, &hmac); + derive_u_streebog (salt, salt_len, iterations, b, &hmac, pAbortKeyDerivation); + // Check if the derivation was aborted + if (pAbortKeyDerivation && *pAbortKeyDerivation) + goto cancelled; memcpy (dk, hmac.u, STREEBOG_DIGESTSIZE); dk += STREEBOG_DIGESTSIZE; } /* last block */ - derive_u_streebog (salt, salt_len, iterations, b, &hmac); + derive_u_streebog (salt, salt_len, iterations, b, &hmac, pAbortKeyDerivation); + // Check if the derivation was aborted (in case of only one block) + if (pAbortKeyDerivation && *pAbortKeyDerivation) + goto cancelled; memcpy (dk, hmac.u, r); - +cancelled: /* Prevent possible leaks. */ burn (&hmac, sizeof(hmac)); burn (key, sizeof(key)); @@ -1245,7 +1330,7 @@ int is_pkcs5_prf_supported (int pkcs5_prf_id, PRF_BOOT_TYPE bootType) } -void derive_key_argon2(const unsigned char *pwd, int pwd_len, const unsigned char *salt, int salt_len, uint32 iterations, uint32 memcost, unsigned char *dk, int dklen) +void derive_key_argon2(const unsigned char *pwd, int pwd_len, const unsigned char *salt, int salt_len, uint32 iterations, uint32 memcost, unsigned char *dk, int dklen, volatile long *pAbortKeyDerivation) { #if defined (DEVICE_DRIVER) && !defined(_M_ARM64) NTSTATUS saveStatus = STATUS_INVALID_PARAMETER; @@ -1259,7 +1344,8 @@ void derive_key_argon2(const unsigned char *pwd, int pwd_len, const unsigned cha 1, // parallelism factor (number of threads) pwd, pwd_len, // password and its length salt, salt_len, // salt and its length - dk, dklen// derived key and its length + dk, dklen,// derived key and its length + pAbortKeyDerivation )) { // If the Argon2 derivation fails, we fill the derived key with zeroes diff --git a/src/Common/Pkcs5.h b/src/Common/Pkcs5.h index 1c6454f9..e18f4431 100644 --- a/src/Common/Pkcs5.h +++ b/src/Common/Pkcs5.h @@ -20,30 +20,31 @@ extern "C" { #endif + +#ifndef TC_WINDOWS_BOOT /* output written to input_digest which must be at lease 32 bytes long */ void hmac_blake2s (unsigned char *key, int keylen, unsigned char *input_digest, int len); -void derive_key_blake2s (const unsigned char *pwd, int pwd_len, const unsigned char *salt, int salt_len, uint32 iterations, unsigned char *dk, int dklen); +void derive_key_blake2s (const unsigned char *pwd, int pwd_len, const unsigned char *salt, int salt_len, uint32 iterations, unsigned char *dk, int dklen, long volatile *pAbortKeyDerivation); /* output written to d which must be at lease 32 bytes long */ void hmac_sha256 (unsigned char *k, int lk, unsigned char *d, int ld); -void derive_key_sha256 (const unsigned char *pwd, int pwd_len, const unsigned char *salt, int salt_len, uint32 iterations, unsigned char *dk, int dklen); +void derive_key_sha256 (const unsigned char *pwd, int pwd_len, const unsigned char *salt, int salt_len, uint32 iterations, unsigned char *dk, int dklen, long volatile *pAbortKeyDerivation); -#ifndef TC_WINDOWS_BOOT /* output written to d which must be at lease 64 bytes long */ void hmac_sha512 (unsigned char *k, int lk, unsigned char *d, int ld); -void derive_key_sha512 (const unsigned char *pwd, int pwd_len, const unsigned char *salt, int salt_len, uint32 iterations, unsigned char *dk, int dklen); +void derive_key_sha512 (const unsigned char *pwd, int pwd_len, const unsigned char *salt, int salt_len, uint32 iterations, unsigned char *dk, int dklen, long volatile *pAbortKeyDerivation); /* output written to d which must be at lease 64 bytes long */ void hmac_whirlpool (unsigned char *k, int lk, unsigned char *d, int ld); -void derive_key_whirlpool (const unsigned char *pwd, int pwd_len, const unsigned char *salt, int salt_len, uint32 iterations, unsigned char *dk, int dklen); +void derive_key_whirlpool (const unsigned char *pwd, int pwd_len, const unsigned char *salt, int salt_len, uint32 iterations, unsigned char *dk, int dklen, long volatile *pAbortKeyDerivation); void hmac_streebog (unsigned char *k, int lk, unsigned char *d, int ld); -void derive_key_streebog (const unsigned char *pwd, int pwd_len, const unsigned char *salt, int salt_len, uint32 iterations, unsigned char *dk, int dklen); +void derive_key_streebog (const unsigned char *pwd, int pwd_len, const unsigned char *salt, int salt_len, uint32 iterations, unsigned char *dk, int dklen, long volatile *pAbortKeyDerivation); int get_pkcs5_iteration_count (int pkcs5_prf_id, int pim, BOOL bBoot, int* pMemoryCost); wchar_t *get_pkcs5_prf_name (int pkcs5_prf_id); -void derive_key_argon2(const unsigned char *pwd, int pwd_len, const unsigned char *salt, int salt_len, uint32 iterations, uint32 memcost, unsigned char *dk, int dklen); +void derive_key_argon2(const unsigned char *pwd, int pwd_len, const unsigned char *salt, int salt_len, uint32 iterations, uint32 memcost, unsigned char *dk, int dklen, long volatile *pAbortKeyDerivation); void get_argon2_params(int pim, int* pIterations, int* pMemcost); /* check if given PRF supported.*/ @@ -55,6 +56,15 @@ typedef enum } PRF_BOOT_TYPE; int is_pkcs5_prf_supported (int pkcs5_prf_id, PRF_BOOT_TYPE bootType); +#else // TC_WINDOWS_BOOT +/* output written to input_digest which must be at lease 32 bytes long */ +void hmac_blake2s (unsigned char *key, int keylen, unsigned char *input_digest, int len); +void derive_key_blake2s (const unsigned char *pwd, int pwd_len, const unsigned char *salt, int salt_len, uint32 iterations, unsigned char *dk, int dklen); + +/* output written to d which must be at lease 32 bytes long */ +void hmac_sha256 (unsigned char *k, int lk, unsigned char *d, int ld); +void derive_key_sha256 (const unsigned char *pwd, int pwd_len, const unsigned char *salt, int salt_len, uint32 iterations, unsigned char *dk, int dklen); + #endif #if defined(__cplusplus) diff --git a/src/Common/Tests.c b/src/Common/Tests.c index 7d81e5bc..2ac8d8ff 100644 --- a/src/Common/Tests.c +++ b/src/Common/Tests.c @@ -1681,64 +1681,64 @@ BOOL test_pkcs5 () return FALSE; #endif /* PKCS-5 test 1 with HMAC-SHA-256 used as the PRF (https://tools.ietf.org/html/draft-josefsson-scrypt-kdf-00) */ - derive_key_sha256 ((unsigned char*) "passwd", 6, (unsigned char*) "\x73\x61\x6C\x74", 4, 1, dk, 64); + derive_key_sha256 ((unsigned char*) "passwd", 6, (unsigned char*) "\x73\x61\x6C\x74", 4, 1, dk, 64, NULL); if (memcmp (dk, "\x55\xac\x04\x6e\x56\xe3\x08\x9f\xec\x16\x91\xc2\x25\x44\xb6\x05\xf9\x41\x85\x21\x6d\xde\x04\x65\xe6\x8b\x9d\x57\xc2\x0d\xac\xbc\x49\xca\x9c\xcc\xf1\x79\xb6\x45\x99\x16\x64\xb3\x9d\x77\xef\x31\x7c\x71\xb8\x45\xb1\xe3\x0b\xd5\x09\x11\x20\x41\xd3\xa1\x97\x83", 64) != 0) return FALSE; /* PKCS-5 test 2 with HMAC-SHA-256 used as the PRF (https://stackoverflow.com/questions/5130513/pbkdf2-hmac-sha2-test-vectors) */ - derive_key_sha256 ((unsigned char*) "password", 8, (unsigned char*) "\x73\x61\x6C\x74", 4, 2, dk, 32); + derive_key_sha256 ((unsigned char*) "password", 8, (unsigned char*) "\x73\x61\x6C\x74", 4, 2, dk, 32, NULL); if (memcmp (dk, "\xae\x4d\x0c\x95\xaf\x6b\x46\xd3\x2d\x0a\xdf\xf9\x28\xf0\x6d\xd0\x2a\x30\x3f\x8e\xf3\xc2\x51\xdf\xd6\xe2\xd8\x5a\x95\x47\x4c\x43", 32) != 0) return FALSE; /* PKCS-5 test 3 with HMAC-SHA-256 used as the PRF (MS CryptoAPI) */ - derive_key_sha256 ((unsigned char*)"password", 8, (unsigned char*)"\x12\x34\x56\x78", 4, 5, dk, 4); + derive_key_sha256 ((unsigned char*)"password", 8, (unsigned char*)"\x12\x34\x56\x78", 4, 5, dk, 4, NULL); if (memcmp (dk, "\xf2\xa0\x4f\xb2", 4) != 0) return FALSE; /* PKCS-5 test 4 with HMAC-SHA-256 used as the PRF (MS CryptoAPI) */ - derive_key_sha256 ((unsigned char*)"password", 8, (unsigned char*)"\x12\x34\x56\x78", 4, 5, dk, 144); + derive_key_sha256 ((unsigned char*)"password", 8, (unsigned char*)"\x12\x34\x56\x78", 4, 5, dk, 144, NULL); if (memcmp (dk, "\xf2\xa0\x4f\xb2\xd3\xe9\xa5\xd8\x51\x0b\x5c\x06\xdf\x70\x8e\x24\xe9\xc7\xd9\x15\x3d\x22\xcd\xde\xb8\xa6\xdb\xfd\x71\x85\xc6\x99\x32\xc0\xee\x37\x27\xf7\x24\xcf\xea\xa6\xac\x73\xa1\x4c\x4e\x52\x9b\x94\xf3\x54\x06\xfc\x04\x65\xa1\x0a\x24\xfe\xf0\x98\x1d\xa6\x22\x28\xeb\x24\x55\x74\xce\x6a\x3a\x28\xe2\x04\x3a\x59\x13\xec\x3f\xf2\xdb\xcf\x58\xdd\x53\xd9\xf9\x17\xf6\xda\x74\x06\x3c\x0b\x66\xf5\x0f\xf5\x58\xa3\x27\x52\x8c\x5b\x07\x91\xd0\x81\xeb\xb6\xbc\x30\x69\x42\x71\xf2\xd7\x18\x42\xbe\xe8\x02\x93\x70\x66\xad\x35\x65\xbc\xf7\x96\x8e\x64\xf1\xc6\x92\xda\xe0\xdc\x1f\xb5\xf4", 144) != 0) return FALSE; /* PKCS-5 test 1 with HMAC-SHA-512 used as the PRF */ - derive_key_sha512 ((unsigned char*)"password", 8, (unsigned char*)"\x12\x34\x56\x78", 4, 5, dk, 4); + derive_key_sha512 ((unsigned char*)"password", 8, (unsigned char*)"\x12\x34\x56\x78", 4, 5, dk, 4, NULL); if (memcmp (dk, "\x13\x64\xae\xf8", 4) != 0) return FALSE; /* PKCS-5 test 2 with HMAC-SHA-512 used as the PRF (derives a key longer than the underlying hash output size and block size) */ - derive_key_sha512 ((unsigned char*)"password", 8, (unsigned char*)"\x12\x34\x56\x78", 4, 5, dk, 144); + derive_key_sha512 ((unsigned char*)"password", 8, (unsigned char*)"\x12\x34\x56\x78", 4, 5, dk, 144, NULL); if (memcmp (dk, "\x13\x64\xae\xf8\x0d\xf5\x57\x6c\x30\xd5\x71\x4c\xa7\x75\x3f\xfd\x00\xe5\x25\x8b\x39\xc7\x44\x7f\xce\x23\x3d\x08\x75\xe0\x2f\x48\xd6\x30\xd7\x00\xb6\x24\xdb\xe0\x5a\xd7\x47\xef\x52\xca\xa6\x34\x83\x47\xe5\xcb\xe9\x87\xf1\x20\x59\x6a\xe6\xa9\xcf\x51\x78\xc6\xb6\x23\xa6\x74\x0d\xe8\x91\xbe\x1a\xd0\x28\xcc\xce\x16\x98\x9a\xbe\xfb\xdc\x78\xc9\xe1\x7d\x72\x67\xce\xe1\x61\x56\x5f\x96\x68\xe6\xe1\xdd\xf4\xbf\x1b\x80\xe0\x19\x1c\xf4\xc4\xd3\xdd\xd5\xd5\x57\x2d\x83\xc7\xa3\x37\x87\xf4\x4e\xe0\xf6\xd8\x6d\x65\xdc\xa0\x52\xa3\x13\xbe\x81\xfc\x30\xbe\x7d\x69\x58\x34\xb6\xdd\x41\xc6", 144) != 0) return FALSE; #ifndef WOLFCRYPT_BACKEND /* PKCS-5 test 1 with HMAC-BLAKE2s used as the PRF */ - derive_key_blake2s ((unsigned char*)"password", 8, (unsigned char*)"\x12\x34\x56\x78", 4, 5, dk, 4); + derive_key_blake2s ((unsigned char*)"password", 8, (unsigned char*)"\x12\x34\x56\x78", 4, 5, dk, 4, NULL); if (memcmp (dk, "\x8d\x51\xfa\x31", 4) != 0) return FALSE; /* PKCS-5 test 2 with HMAC-BLAKE2s used as the PRF (derives a key longer than the underlying hash) */ - derive_key_blake2s ((unsigned char*)"password", 8, (unsigned char*)"\x12\x34\x56\x78", 4, 5, dk, 48); + derive_key_blake2s ((unsigned char*)"password", 8, (unsigned char*)"\x12\x34\x56\x78", 4, 5, dk, 48, NULL); if (memcmp (dk, "\x8d\x51\xfa\x31\x46\x25\x37\x67\xa3\x29\x6b\x3c\x6b\xc1\x5d\xb2\xee\xe1\x6c\x28\x00\x26\xea\x08\x65\x9c\x12\xf1\x07\xde\x0d\xb9\x9b\x4f\x39\xfa\xc6\x80\x26\xb1\x8f\x8e\x48\x89\x85\x2d\x24\x2d", 48) != 0) return FALSE; /* PKCS-5 test 1 with HMAC-Whirlpool used as the PRF */ - derive_key_whirlpool ((unsigned char*)"password", 8, (unsigned char*)"\x12\x34\x56\x78", 4, 5, dk, 4); + derive_key_whirlpool ((unsigned char*)"password", 8, (unsigned char*)"\x12\x34\x56\x78", 4, 5, dk, 4, NULL); if (memcmp (dk, "\x50\x7c\x36\x6f", 4) != 0) return FALSE; /* PKCS-5 test 2 with HMAC-Whirlpool used as the PRF (derives a key longer than the underlying hash) */ - derive_key_whirlpool ((unsigned char*)"password", 8, (unsigned char*)"\x12\x34\x56\x78", 4, 5, dk, 96); + derive_key_whirlpool ((unsigned char*)"password", 8, (unsigned char*)"\x12\x34\x56\x78", 4, 5, dk, 96, NULL); if (memcmp (dk, "\x50\x7c\x36\x6f\xee\x10\x2e\x9a\xe2\x8a\xd5\x82\x72\x7d\x27\x0f\xe8\x4d\x7f\x68\x7a\xcf\xb5\xe7\x43\x67\xaa\x98\x93\x52\x2b\x09\x6e\x42\xdf\x2c\x59\x4a\x91\x6d\x7e\x10\xae\xb2\x1a\x89\x8f\xb9\x8f\xe6\x31\xa9\xd8\x9f\x98\x26\xf4\xda\xcd\x7d\x65\x65\xde\x10\x95\x91\xb4\x84\x26\xae\x43\xa1\x00\x5b\x1e\xb8\x38\x97\xa4\x1e\x4b\xd2\x65\x64\xbc\xfa\x1f\x35\x85\xdb\x4f\x97\x65\x6f\xbd\x24", 96) != 0) return FALSE; /* PKCS-5 test 1 with HMAC-STREEBOG used as the PRF */ - derive_key_streebog ((unsigned char*)"password", 8, (unsigned char*)"\x12\x34\x56\x78", 4, 5, dk, 4); + derive_key_streebog ((unsigned char*)"password", 8, (unsigned char*)"\x12\x34\x56\x78", 4, 5, dk, 4, NULL); if (memcmp (dk, "\xd0\x53\xa2\x30", 4) != 0) return FALSE; /* PKCS-5 test 2 with HMAC-STREEBOG used as the PRF (derives a key longer than the underlying hash) */ - derive_key_streebog ((unsigned char*)"password", 8, (unsigned char*)"\x12\x34\x56\x78", 4, 5, dk, 96); + derive_key_streebog ((unsigned char*)"password", 8, (unsigned char*)"\x12\x34\x56\x78", 4, 5, dk, 96, NULL); if (memcmp (dk, "\xd0\x53\xa2\x30\x6f\x45\x81\xeb\xbc\x06\x81\xc5\xe7\x53\xa8\x5d\xc7\xf1\x23\x33\x1e\xbe\x64\x2c\x3b\x0f\x26\xd7\x00\xe1\x95\xc9\x65\x26\xb1\x85\xbe\x1e\xe2\xf4\x9b\xfc\x6b\x14\x84\xda\x24\x61\xa0\x1b\x9e\x79\x5c\xee\x69\x6e\xf9\x25\xb1\x1d\xca\xa0\x31\xba\x02\x6f\x9e\x99\x0f\xdb\x25\x01\x5b\xf1\xc7\x10\x19\x53\x3b\x29\x3f\x18\x00\xd6\xfc\x85\x03\xdc\xf2\xe5\xe9\x5a\xb1\x1e\x61\xde", 96) != 0) return FALSE; #endif diff --git a/src/Common/Volumes.c b/src/Common/Volumes.c index 1530f6e7..2b098d8f 100644 --- a/src/Common/Volumes.c +++ b/src/Common/Volumes.c @@ -194,6 +194,7 @@ int ReadVolumeHeader (BOOL bBoot, unsigned char *encryptedHeader, Password *pass int i; int iterationsCount = 0; int memoryCost = 0; + LONG volatile abortKeyDerivation = 0; #endif size_t queuedWorkItems = 0; @@ -331,7 +332,7 @@ int ReadVolumeHeader (BOOL bBoot, unsigned char *encryptedHeader, Password *pass iterationsCount = get_pkcs5_iteration_count (enqPkcs5Prf, pim, bBoot, &memoryCost); EncryptionThreadPoolBeginKeyDerivation (keyDerivationCompletedEvent, noOutstandingWorkItemEvent, &item->KeyReady, outstandingWorkItemCount, enqPkcs5Prf, keyInfo->userKey, - keyInfo->keyLength, keyInfo->salt, iterationsCount, memoryCost, item->DerivedKey); + keyInfo->keyLength, keyInfo->salt, iterationsCount, memoryCost, item->DerivedKey, &abortKeyDerivation); ++queuedWorkItems; break; @@ -382,35 +383,35 @@ KeyReady: ; { case SHA512: derive_key_sha512 (keyInfo->userKey, keyInfo->keyLength, keyInfo->salt, - PKCS5_SALT_SIZE, keyInfo->noIterations, dk, GetMaxPkcs5OutSize()); + PKCS5_SALT_SIZE, keyInfo->noIterations, dk, GetMaxPkcs5OutSize(), &abortKeyDerivation); break; case SHA256: derive_key_sha256 (keyInfo->userKey, keyInfo->keyLength, keyInfo->salt, - PKCS5_SALT_SIZE, keyInfo->noIterations, dk, GetMaxPkcs5OutSize()); + PKCS5_SALT_SIZE, keyInfo->noIterations, dk, GetMaxPkcs5OutSize(), &abortKeyDerivation); break; #ifndef WOLFCRYPT_BACKEND case BLAKE2S: derive_key_blake2s (keyInfo->userKey, keyInfo->keyLength, keyInfo->salt, - PKCS5_SALT_SIZE, keyInfo->noIterations, dk, GetMaxPkcs5OutSize()); + PKCS5_SALT_SIZE, keyInfo->noIterations, dk, GetMaxPkcs5OutSize(), &abortKeyDerivation); break; case WHIRLPOOL: derive_key_whirlpool (keyInfo->userKey, keyInfo->keyLength, keyInfo->salt, - PKCS5_SALT_SIZE, keyInfo->noIterations, dk, GetMaxPkcs5OutSize()); + PKCS5_SALT_SIZE, keyInfo->noIterations, dk, GetMaxPkcs5OutSize(), &abortKeyDerivation); break; case STREEBOG: derive_key_streebog(keyInfo->userKey, keyInfo->keyLength, keyInfo->salt, - PKCS5_SALT_SIZE, keyInfo->noIterations, dk, GetMaxPkcs5OutSize()); + PKCS5_SALT_SIZE, keyInfo->noIterations, dk, GetMaxPkcs5OutSize(), &abortKeyDerivation); break; case ARGON2: derive_key_argon2(keyInfo->userKey, keyInfo->keyLength, keyInfo->salt, - PKCS5_SALT_SIZE, keyInfo->noIterations, keyInfo->memoryCost, dk, GetMaxPkcs5OutSize()); + PKCS5_SALT_SIZE, keyInfo->noIterations, keyInfo->memoryCost, dk, GetMaxPkcs5OutSize(), &abortKeyDerivation); break; #endif default: @@ -615,6 +616,12 @@ KeyReady: ; } status = ERR_SUCCESS; + + if ((selected_pkcs5_prf == 0) && (encryptionThreadCount > 1)) + { + // Signal other threads to stop + InterlockedExchange(&abortKeyDerivation, 1); + } goto ret; } } @@ -622,6 +629,8 @@ KeyReady: ; status = ERR_PASSWORD_WRONG; err: + // Signal threads to stop + InterlockedExchange(&abortKeyDerivation, 1); if (cryptoInfo != retHeaderCryptoInfo) { crypto_close(cryptoInfo); @@ -640,19 +649,33 @@ ret: #if !defined(_UEFI) if ((selected_pkcs5_prf == 0) && (encryptionThreadCount > 1)) { - EncryptionThreadPoolBeginReadVolumeHeaderFinalization (keyDerivationCompletedEvent, noOutstandingWorkItemEvent, outstandingWorkItemCount, - keyInfoBuffer, keyInfoBufferSize, - keyDerivationWorkItems, keyDerivationWorkItemsSize); - } - else + // Wait for all outstanding threads to finish or cancel + TC_WAIT_EVENT(*noOutstandingWorkItemEvent); + // Cleanup is now synchronous because we already waited for all threads to stop. + // The asynchronous finalization is no longer needed. +#if !defined(DEVICE_DRIVER) + CloseHandle(*keyDerivationCompletedEvent); + CloseHandle(*noOutstandingWorkItemEvent); #endif - { - burn (keyInfo, sizeof (KEY_INFO)); -#if !defined(DEVICE_DRIVER) && !defined(_UEFI) - VirtualUnlock (keyInfoBuffer, keyInfoBufferSize); + TCfree(keyDerivationCompletedEvent); + TCfree(noOutstandingWorkItemEvent); + TCfree(outstandingWorkItemCount); + if (keyDerivationWorkItems) + { + burn(keyDerivationWorkItems, keyDerivationWorkItemsSize); +#if !defined(DEVICE_DRIVER) + VirtualUnlock(keyDerivationWorkItems, keyDerivationWorkItemsSize); #endif - TCfree(keyInfoBuffer); + TCfree(keyDerivationWorkItems); + } } +#endif + + burn (keyInfo, sizeof (KEY_INFO)); +#if !defined(DEVICE_DRIVER) + VirtualUnlock (keyInfoBuffer, keyInfoBufferSize); +#endif + TCfree(keyInfoBuffer); return status; } @@ -1018,33 +1041,33 @@ int CreateVolumeHeaderInMemory (HWND hwndDlg, BOOL bBoot, unsigned char *header, { case SHA512: derive_key_sha512 (keyInfo.userKey, keyInfo.keyLength, keyInfo.salt, - PKCS5_SALT_SIZE, keyInfo.noIterations, dk, GetMaxPkcs5OutSize()); + PKCS5_SALT_SIZE, keyInfo.noIterations, dk, GetMaxPkcs5OutSize(), NULL); break; case SHA256: derive_key_sha256 (keyInfo.userKey, keyInfo.keyLength, keyInfo.salt, - PKCS5_SALT_SIZE, keyInfo.noIterations, dk, GetMaxPkcs5OutSize()); + PKCS5_SALT_SIZE, keyInfo.noIterations, dk, GetMaxPkcs5OutSize(), NULL); break; #ifndef WOLFCRYPT_BACKEND case BLAKE2S: derive_key_blake2s (keyInfo.userKey, keyInfo.keyLength, keyInfo.salt, - PKCS5_SALT_SIZE, keyInfo.noIterations, dk, GetMaxPkcs5OutSize()); + PKCS5_SALT_SIZE, keyInfo.noIterations, dk, GetMaxPkcs5OutSize(), NULL); break; case WHIRLPOOL: derive_key_whirlpool (keyInfo.userKey, keyInfo.keyLength, keyInfo.salt, - PKCS5_SALT_SIZE, keyInfo.noIterations, dk, GetMaxPkcs5OutSize()); + PKCS5_SALT_SIZE, keyInfo.noIterations, dk, GetMaxPkcs5OutSize(), NULL); break; case STREEBOG: derive_key_streebog(keyInfo.userKey, keyInfo.keyLength, keyInfo.salt, - PKCS5_SALT_SIZE, keyInfo.noIterations, dk, GetMaxPkcs5OutSize()); + PKCS5_SALT_SIZE, keyInfo.noIterations, dk, GetMaxPkcs5OutSize(), NULL); break; case ARGON2: derive_key_argon2(keyInfo.userKey, keyInfo.keyLength, keyInfo.salt, - PKCS5_SALT_SIZE, keyInfo.noIterations, keyInfo.memoryCost, dk, GetMaxPkcs5OutSize()); + PKCS5_SALT_SIZE, keyInfo.noIterations, keyInfo.memoryCost, dk, GetMaxPkcs5OutSize(), NULL); break; #endif default: diff --git a/src/Crypto/Argon2/include/argon2.h b/src/Crypto/Argon2/include/argon2.h index 4a812299..49239967 100644 --- a/src/Crypto/Argon2/include/argon2.h +++ b/src/Crypto/Argon2/include/argon2.h @@ -165,7 +165,9 @@ typedef enum Argon2_ErrorCodes { ARGON2_DECODING_LENGTH_FAIL = -34, - ARGON2_VERIFY_MISMATCH = -35 + ARGON2_VERIFY_MISMATCH = -35, + + ARGON2_OPERATION_CANCELLED = -36 } argon2_error_codes; /* Memory allocator types --- for external allocation */ @@ -222,6 +224,9 @@ typedef struct Argon2_Context { uint32_t version; /* version number */ + /* Cancellation token for VeraCrypt */ + long volatile *pAbortKeyDerivation; + allocate_fptr allocate_cbk; /* pointer to memory allocator */ deallocate_fptr free_cbk; /* pointer to memory deallocator */ @@ -275,20 +280,20 @@ ARGON2_PUBLIC int argon2i_hash_raw(const uint32_t t_cost, const uint32_t m_cost, const uint32_t parallelism, const void *pwd, const size_t pwdlen, const void *salt, const size_t saltlen, void *hash, - const size_t hashlen); + const size_t hashlen, long volatile* pAbortKeyDerivation); ARGON2_PUBLIC int argon2d_hash_raw(const uint32_t t_cost, const uint32_t m_cost, const uint32_t parallelism, const void *pwd, const size_t pwdlen, const void *salt, const size_t saltlen, void *hash, - const size_t hashlen); + const size_t hashlen, long volatile* pAbortKeyDerivation); ARGON2_PUBLIC int argon2id_hash_raw(const uint32_t t_cost, const uint32_t m_cost, const uint32_t parallelism, const void *pwd, const size_t pwdlen, const void *salt, const size_t saltlen, void *hash, - const size_t hashlen); + const size_t hashlen, long volatile *pAbortKeyDerivation); /* generic function underlying the above ones */ ARGON2_PUBLIC int argon2_hash(const uint32_t t_cost, const uint32_t m_cost, @@ -296,7 +301,7 @@ ARGON2_PUBLIC int argon2_hash(const uint32_t t_cost, const uint32_t m_cost, const size_t pwdlen, const void *salt, const size_t saltlen, void *hash, const size_t hashlen, argon2_type type, - const uint32_t version); + const uint32_t version, long volatile *pAbortKeyDerivation); /** * Argon2d: Version of Argon2 that picks memory blocks depending diff --git a/src/Crypto/Argon2/src/argon2.c b/src/Crypto/Argon2/src/argon2.c index d5c682b5..e3b2b177 100644 --- a/src/Crypto/Argon2/src/argon2.c +++ b/src/Crypto/Argon2/src/argon2.c @@ -24,6 +24,9 @@ #include #include #endif +#include "Crypto/config.h" +#include "Crypto/cpu.h" +#include "Crypto/misc.h" const char *argon2_type2string(argon2_type type, int uppercase) { switch (type) { @@ -91,6 +94,9 @@ int argon2_ctx(argon2_context *context, argon2_type type) { result = fill_memory_blocks(&instance); if (ARGON2_OK != result) { + // If cancelled, we must still free the allocated memory! + free_memory(context, (uint8_t *)instance.memory, + instance.memory_blocks, sizeof(block)); return result; } /* 5. Finalization */ @@ -103,7 +109,7 @@ int argon2_hash(const uint32_t t_cost, const uint32_t m_cost, const uint32_t parallelism, const void *pwd, const size_t pwdlen, const void *salt, const size_t saltlen, void *hash, const size_t hashlen, argon2_type type, - const uint32_t version){ + const uint32_t version, long volatile *pAbortKeyDerivation){ argon2_context context; int result; @@ -148,6 +154,7 @@ int argon2_hash(const uint32_t t_cost, const uint32_t m_cost, context.free_cbk = NULL; context.flags = ARGON2_DEFAULT_FLAGS; context.version = version; + context.pAbortKeyDerivation = pAbortKeyDerivation; result = argon2_ctx(&context, type); @@ -171,28 +178,28 @@ int argon2_hash(const uint32_t t_cost, const uint32_t m_cost, int argon2i_hash_raw(const uint32_t t_cost, const uint32_t m_cost, const uint32_t parallelism, const void *pwd, const size_t pwdlen, const void *salt, - const size_t saltlen, void *hash, const size_t hashlen) { + const size_t saltlen, void *hash, const size_t hashlen, long volatile* pAbortKeyDerivation) { return argon2_hash(t_cost, m_cost, parallelism, pwd, pwdlen, salt, saltlen, - hash, hashlen, Argon2_i, ARGON2_VERSION_NUMBER); + hash, hashlen, Argon2_i, ARGON2_VERSION_NUMBER, pAbortKeyDerivation); } int argon2d_hash_raw(const uint32_t t_cost, const uint32_t m_cost, const uint32_t parallelism, const void *pwd, const size_t pwdlen, const void *salt, - const size_t saltlen, void *hash, const size_t hashlen) { + const size_t saltlen, void *hash, const size_t hashlen, long volatile* pAbortKeyDerivation) { return argon2_hash(t_cost, m_cost, parallelism, pwd, pwdlen, salt, saltlen, - hash, hashlen, Argon2_d, ARGON2_VERSION_NUMBER); + hash, hashlen, Argon2_d, ARGON2_VERSION_NUMBER, pAbortKeyDerivation); } int argon2id_hash_raw(const uint32_t t_cost, const uint32_t m_cost, const uint32_t parallelism, const void *pwd, const size_t pwdlen, const void *salt, - const size_t saltlen, void *hash, const size_t hashlen) { + const size_t saltlen, void *hash, const size_t hashlen, long volatile *pAbortKeyDerivation) { return argon2_hash(t_cost, m_cost, parallelism, pwd, pwdlen, salt, saltlen, hash, hashlen, Argon2_id, - ARGON2_VERSION_NUMBER); + ARGON2_VERSION_NUMBER, pAbortKeyDerivation); } int argon2d_ctx(argon2_context *context) { diff --git a/src/Crypto/Argon2/src/blake2/blake2-impl.h b/src/Crypto/Argon2/src/blake2/blake2-impl.h index 2537e0ef..753e704b 100644 --- a/src/Crypto/Argon2/src/blake2/blake2-impl.h +++ b/src/Crypto/Argon2/src/blake2/blake2-impl.h @@ -140,6 +140,8 @@ static BLAKE2_INLINE void store48(void *dst, uint64_t w) { *p++ = (uint8_t)w; } +/* removed since they are defined in VeraCrypt headers */ +/* static BLAKE2_INLINE uint32_t rotr32(const uint32_t w, const unsigned c) { return (w >> c) | (w << (32 - c)); } @@ -147,7 +149,7 @@ static BLAKE2_INLINE uint32_t rotr32(const uint32_t w, const unsigned c) { static BLAKE2_INLINE uint64_t rotr64(const uint64_t w, const unsigned c) { return (w >> c) | (w << (64 - c)); } - +*/ void clear_internal_memory(void *v, size_t n); #endif diff --git a/src/Crypto/Argon2/src/blake2/blake2b.c b/src/Crypto/Argon2/src/blake2/blake2b.c index fefaf4a0..00133d1b 100644 --- a/src/Crypto/Argon2/src/blake2/blake2b.c +++ b/src/Crypto/Argon2/src/blake2/blake2b.c @@ -15,6 +15,9 @@ * software. If not, they may be obtained at the above URLs. */ +#include "Crypto/config.h" +#include "Crypto/cpu.h" +#include "Crypto/misc.h" #include "blake2.h" #include "blake2-impl.h" diff --git a/src/Crypto/Argon2/src/blake2/blamka-round-opt.h b/src/Crypto/Argon2/src/blake2/blamka-round-opt.h index 3127f2a3..ab6bce5e 100644 --- a/src/Crypto/Argon2/src/blake2/blamka-round-opt.h +++ b/src/Crypto/Argon2/src/blake2/blamka-round-opt.h @@ -20,13 +20,14 @@ #include "blake2-impl.h" -#include +/* remove to solve build errors under Windows Driver since */ +//#include #if defined(__SSSE3__) -#include /* for _mm_shuffle_epi8 and _mm_alignr_epi8 */ +//#include /* for _mm_shuffle_epi8 and _mm_alignr_epi8 */ #endif #if defined(__XOP__) && (defined(__GNUC__) || defined(__clang__)) -#include +//#include #endif #if !defined(__AVX512F__) @@ -180,7 +181,7 @@ static BLAKE2_INLINE __m128i fBlaMka(__m128i x, __m128i y) { } while ((void)0, 0) #else /* __AVX2__ */ -#include +//#include #define rotr32(x) _mm256_shuffle_epi32(x, _MM_SHUFFLE(2, 3, 0, 1)) #define rotr24(x) _mm256_shuffle_epi8(x, _mm256_setr_epi8(3, 4, 5, 6, 7, 0, 1, 2, 11, 12, 13, 14, 15, 8, 9, 10, 3, 4, 5, 6, 7, 0, 1, 2, 11, 12, 13, 14, 15, 8, 9, 10)) @@ -329,7 +330,7 @@ static BLAKE2_INLINE __m128i fBlaMka(__m128i x, __m128i y) { #else /* __AVX512F__ */ -#include +//#include #define ror64(x, n) _mm512_ror_epi64((x), (n)) diff --git a/src/Crypto/Argon2/src/core.c b/src/Crypto/Argon2/src/core.c index 7ae76173..1e67cce6 100644 --- a/src/Crypto/Argon2/src/core.c +++ b/src/Crypto/Argon2/src/core.c @@ -19,6 +19,13 @@ #include "core.h" +#include "Crypto/config.h" +#if !defined(_UEFI) +#include +#include +#endif +#include "Crypto/cpu.h" +#include "Crypto/misc.h" #include "blake2/blake2.h" #include "blake2/blake2-impl.h" @@ -206,12 +213,16 @@ uint32_t index_alpha(const argon2_instance_t *instance, /* Single-threaded version for p=1 case */ static int fill_memory_blocks_st(argon2_instance_t *instance) { uint32_t r, s, l; + int result = ARGON2_OK; for (r = 0; r < instance->passes; ++r) { for (s = 0; s < ARGON2_SYNC_POINTS; ++s) { for (l = 0; l < instance->lanes; ++l) { argon2_position_t position = {r, l, (uint8_t)s, 0}; - fill_segment(instance, position); + result = fill_segment(instance, position); + if (result != ARGON2_OK) { + return result; + } } } #ifdef GENKAT diff --git a/src/Crypto/Argon2/src/core.h b/src/Crypto/Argon2/src/core.h index 1e0fa0b5..4c14a5cb 100644 --- a/src/Crypto/Argon2/src/core.h +++ b/src/Crypto/Argon2/src/core.h @@ -216,7 +216,7 @@ void finalize(const argon2_context *context, argon2_instance_t *instance); * @param position Current position * @pre all block pointers must be valid */ -void fill_segment(const argon2_instance_t *instance, +int fill_segment(const argon2_instance_t *instance, argon2_position_t position); /* diff --git a/src/Crypto/Argon2/src/opt_avx2.asm b/src/Crypto/Argon2/src/opt_avx2.asm index 087a2d6f..a95d2d86 100644 --- a/src/Crypto/Argon2/src/opt_avx2.asm +++ b/src/Crypto/Argon2/src/opt_avx2.asm @@ -358,6 +358,8 @@ fill_block: jne .L5 jmp .L4 + +align 16 next_addresses: push rdi push rbx @@ -386,6 +388,8 @@ next_addresses: pop rdi ret + +align 16 global fill_segment_avx2 fill_segment_avx2: push r15 @@ -403,7 +407,7 @@ fill_segment_avx2: vmovdqu [rsp+48], xmm1 and r14, -32 test rcx, rcx - je .L44 + je .L37 mov edx, dword [rcx+36] cmp edx, 1 je .L18 @@ -413,47 +417,54 @@ fill_segment_avx2: je .L19 mov ebp, dword [rsp+52] test r12d, r12d - jne .L37 - xor r12d, r12d - test al, al - sete r12b + je .L51 xor r15d, r15d - add r12d, r12d + xor r12d, r12d .L20: - mov r8d, dword [rbx+24] - mov r9d, dword [rbx+20] + mov ecx, dword [rbx+24] + mov r8d, dword [rbx+20] xor edx, edx - mov ecx, 128 mov rdi, r14 - imul ebp, r8d - imul eax, r9d + imul ebp, ecx + imul eax, r8d add ebp, r12d add ebp, eax mov eax, ebp lea r13d, [rbp-1] - div r8d - lea eax, [rbp+r8-1] + div ecx + lea eax, [rbp+rcx-1] + mov ecx, 128 test edx, edx cmove r13d, eax + lea rax, [rsp+64] + mov qword [rsp+40], rax mov esi, r13d sal rsi, 10 add rsi, qword [rbx] rep movsq - cmp r12d, r9d - jnb .L44 - lea rax, [rsp+1088] - mov qword [rsp+32], rax - lea rax, [rsp+64] - mov qword [rsp+40], rax - jmp .L35 - align 16 - align 8 -.L46: + cmp r12d, r8d + jb .L24 + jmp .L36 + +align 16 +align 8 +.L53: + mov esi, r12d + and esi, 127 + je .L52 +.L29: + mov edx, dword [rsp+48] + mov eax, esi + mov ecx, dword [rsp+52] + mov r8, qword [rsp+64+rax*8] + test edx, edx + jne .L31 +.L54: cmp byte [rsp+56], 0 - jne .L30 - mov rdi, rcx + jne .L31 + mov rsi, rcx mov r9d, 1 -.L31: +.L32: lea rdx, [rsp+48] mov rcx, rbx mov dword [rsp+60], r12d @@ -461,7 +472,7 @@ fill_segment_avx2: mov edx, dword [rbx+24] mov r8, qword [rbx] mov eax, eax - imul rdx, rdi + imul rdx, rsi add rdx, rax mov eax, ebp sal rdx, 10 @@ -469,55 +480,80 @@ fill_segment_avx2: add rdx, r8 add r8, rax cmp dword [rbx+8], 16 - je .L32 + je .L33 mov eax, dword [rsp+48] test eax, eax - je .L32 + je .L33 mov r9d, 1 mov rcx, r14 - call fill_block -.L34: add r12d, 1 - cmp r12d, dword [rbx+20] - jnb .L44 - mov r8d, dword [rbx+24] add ebp, 1 -.L35: + call fill_block + cmp r12d, dword [rbx+20] + jnb .L36 +.L24: + test r12b, 63 + jne .L25 + mov rax, qword [rbx+48] + mov rax, qword [rax+96] + test rax, rax + je .L25 + mov eax, dword [rax] + test eax, eax + jne .L41 +.L25: mov eax, ebp xor edx, edx - div r8d + div dword [rbx+24] cmp edx, 1 - je .L25 + je .L26 mov eax, r13d add r13d, 1 -.L26: +.L27: test r15d, r15d - je .L27 - mov edi, r12d - and edi, 127 - je .L45 -.L28: - mov eax, edi - mov r8, qword [rsp+64+rax*8] -.L29: + jne .L53 mov edx, dword [rsp+48] + sal rax, 10 + add rax, qword [rbx] + mov r8, qword [rax] mov ecx, dword [rsp+52] test edx, edx - je .L46 -.L30: - mov edi, dword [rbx+28] + je .L54 +.L31: + mov esi, dword [rbx+28] mov rax, r8 xor edx, edx xor r9d, r9d shr rax, 32 - div rdi + div rsi cmp rdx, rcx - mov rdi, rdx + mov rsi, rdx sete r9b - jmp .L31 - align 16 - align 8 -.L44: + jmp .L32 + +align 16 +align 8 +.L51: + xor r12d, r12d + test al, al + sete r12b + xor r15d, r15d + add r12d, r12d + jmp .L20 + +align 16 +align 8 +.L33: + xor r9d, r9d + mov rcx, r14 + add r12d, 1 + add ebp, 1 + call fill_block + cmp r12d, dword [rbx+20] + jb .L24 +.L36: + xor eax, eax +.L16: add rsp, 3160 pop rbx pop rsi @@ -528,51 +564,35 @@ fill_segment_avx2: pop r14 pop r15 ret - align 16 - align 8 + +align 16 +align 8 +.L26: + lea eax, [rbp-1] + mov r13d, ebp + jmp .L27 + +align 16 +align 8 +.L52: + mov rcx, qword [rsp+40] + lea rdx, [rsp+1088] + call next_addresses + jmp .L29 + +align 16 +align 8 .L19: test r12d, r12d - jne .L47 + jne .L55 cmp al, 1 jbe .L18 mov ebp, dword [rsp+52] xor r15d, r15d jmp .L20 - align 16 - align 8 -.L32: - xor r9d, r9d - mov rcx, r14 - call fill_block - jmp .L34 - align 16 - align 8 -.L27: - sal rax, 10 - add rax, qword [rbx] - mov r8, qword [rax] - jmp .L29 - align 16 - align 8 -.L25: - lea eax, [rbp-1] - mov r13d, ebp - jmp .L26 - align 16 - align 8 -.L45: - mov rdx, qword [rsp+32] - mov rcx, qword [rsp+40] - call next_addresses - jmp .L28 - align 16 - align 8 -.L37: - xor r15d, r15d - xor r12d, r12d - jmp .L20 - align 16 - align 8 + +align 16 +align 8 .L18: xor edx, edx lea rcx, [rsp+1088] @@ -583,18 +603,18 @@ fill_segment_avx2: vpinsrd xmm0, xmm2, dword [rbx+36], 1 movzx edx, byte [rsp+56] mov qword [rsp+1088], rax - mov ecx, dword [rbx+16] + mov edi, dword [rbx+16] mov r12, rax mov eax, dword [rsp+52] vpmovzxdq xmm0, xmm0 mov qword [rsp+1104], rdx - mov qword [rsp+1112], rcx + mov qword [rsp+1112], rdi mov qword [rsp+1096], rax mov rbp, rax mov rax, rdx vmovdqu [rsp+1120], xmm0 test r12d, r12d - jne .L38 + jne .L39 test dl, dl jne .L20 lea rcx, [rsp+64] @@ -604,19 +624,28 @@ fill_segment_avx2: mov ebp, dword [rsp+52] movzx eax, byte [rsp+56] jmp .L20 - align 16 - align 8 -.L47: + +align 16 +align 8 +.L41: + mov eax, -36 + jmp .L16 + +.L55: mov ebp, dword [rsp+52] xor r15d, r15d xor r12d, r12d jmp .L20 - align 16 - align 8 -.L38: + +.L39: xor r12d, r12d jmp .L20 +.L37: + mov eax, -25 + jmp .L16 + + section .rdata align=32 LC0: db 3,4,5,6,7,0,1,2,11,12,13,14,15,8,9,10 @@ -626,8 +655,8 @@ LC1: db 2,3,4,5,6,7,0,1,10,11,12,13,14,15,8,9 db 2,3,4,5,6,7,0,1,10,11,12,13,14,15,8,9 -; External functions +; External symbols extern index_alpha extern init_block_value -; end of file +; End of file diff --git a/src/Crypto/Argon2/src/opt_avx2.c b/src/Crypto/Argon2/src/opt_avx2.c index 6d326c5a..f9436d3b 100644 --- a/src/Crypto/Argon2/src/opt_avx2.c +++ b/src/Crypto/Argon2/src/opt_avx2.c @@ -20,6 +20,9 @@ #include "argon2.h" #include "core.h" +#include "Crypto/config.h" +#include "Crypto/cpu.h" +#include "Crypto/misc.h" #if defined(__AVX2__) @@ -88,7 +91,7 @@ static void next_addresses(block *address_block, block *input_block) { fill_block(zero2_block, address_block, address_block, 0); } -void fill_segment_avx2(const argon2_instance_t *instance, +int fill_segment_avx2(const argon2_instance_t *instance, argon2_position_t position) { block *ref_block = NULL, *curr_block = NULL; block address_block, input_block; @@ -99,7 +102,7 @@ void fill_segment_avx2(const argon2_instance_t *instance, int data_independent_addressing; if (instance == NULL) { - return; + return ARGON2_INCORRECT_PARAMETER; } data_independent_addressing = @@ -145,6 +148,12 @@ void fill_segment_avx2(const argon2_instance_t *instance, for (i = starting_index; i < instance->segment_length; ++i, ++curr_offset, ++prev_offset) { + // Check every 64 blocks. This is a good balance for responsiveness. + if ((i & 63) == 0 && instance->context_ptr->pAbortKeyDerivation && + *instance->context_ptr->pAbortKeyDerivation) + { + return ARGON2_OPERATION_CANCELLED; // Return cancellation code + } /*1.1 Rotating prev_offset if needed */ if (curr_offset % instance->lane_length == 1) { prev_offset = curr_offset - 1; @@ -191,11 +200,13 @@ void fill_segment_avx2(const argon2_instance_t *instance, } } } + return ARGON2_OK; } #else -void fill_segment_avx2(const argon2_instance_t* instance, +int fill_segment_avx2(const argon2_instance_t* instance, argon2_position_t position) { (void)instance; (void)position; + return ARGON2_INCORRECT_PARAMETER; /* AVX2 not available */ } #endif diff --git a/src/Crypto/Argon2/src/opt_sse2.c b/src/Crypto/Argon2/src/opt_sse2.c index 60393819..8b421351 100644 --- a/src/Crypto/Argon2/src/opt_sse2.c +++ b/src/Crypto/Argon2/src/opt_sse2.c @@ -19,12 +19,14 @@ #include "argon2.h" #include "core.h" +#include "Crypto/config.h" +#include "Crypto/cpu.h" +#include "Crypto/misc.h" #if CRYPTOPP_BOOL_SSE2_INTRINSICS_AVAILABLE #include "blake2/blake2.h" #include "blake2/blamka-round-opt.h" -#include "Crypto/config.h" /* * Function fills a new memory block and optionally XORs the old block over the new one. @@ -91,7 +93,7 @@ static void next_addresses(block *address_block, block *input_block) { fill_block(zero2_block, address_block, address_block, 0); } -void fill_segment_sse2(const argon2_instance_t *instance, +int fill_segment_sse2(const argon2_instance_t *instance, argon2_position_t position) { block *ref_block = NULL, *curr_block = NULL; block address_block, input_block; @@ -102,7 +104,7 @@ void fill_segment_sse2(const argon2_instance_t *instance, int data_independent_addressing; if (instance == NULL) { - return; + return ARGON2_INCORRECT_PARAMETER; } data_independent_addressing = @@ -148,6 +150,13 @@ void fill_segment_sse2(const argon2_instance_t *instance, for (i = starting_index; i < instance->segment_length; ++i, ++curr_offset, ++prev_offset) { + + // Check every 64 blocks. This is a good balance for responsiveness. + if ((i & 63) == 0 && instance->context_ptr->pAbortKeyDerivation && + *instance->context_ptr->pAbortKeyDerivation) + { + return ARGON2_OPERATION_CANCELLED; // Return cancellation code + } /*1.1 Rotating prev_offset if needed */ if (curr_offset % instance->lane_length == 1) { prev_offset = curr_offset - 1; @@ -194,11 +203,13 @@ void fill_segment_sse2(const argon2_instance_t *instance, } } } + return ARGON2_OK; } #else -void fill_segment_sse2(const argon2_instance_t* instance, +int fill_segment_sse2(const argon2_instance_t* instance, argon2_position_t position) { (void)instance; (void)position; + return ARGON2_INCORRECT_PARAMETER; // SSE2 not available } #endif diff --git a/src/Crypto/Argon2/src/ref.c b/src/Crypto/Argon2/src/ref.c index bd832143..097dcd0c 100644 --- a/src/Crypto/Argon2/src/ref.c +++ b/src/Crypto/Argon2/src/ref.c @@ -89,7 +89,7 @@ static void next_addresses(block *address_block, block *input_block, fill_block(zero_block, address_block, address_block, 0); } -void fill_segment_ref(const argon2_instance_t *instance, +int fill_segment_ref(const argon2_instance_t *instance, argon2_position_t position) { block *ref_block = NULL, *curr_block = NULL; block address_block, input_block, zero_block; @@ -100,7 +100,7 @@ void fill_segment_ref(const argon2_instance_t *instance, int data_independent_addressing; if (instance == NULL) { - return; + return ARGON2_INCORRECT_PARAMETER; } data_independent_addressing = @@ -145,6 +145,11 @@ void fill_segment_ref(const argon2_instance_t *instance, for (i = starting_index; i < instance->segment_length; ++i, ++curr_offset, ++prev_offset) { + if ((i & 63) == 0 && instance->context_ptr->pAbortKeyDerivation && + *instance->context_ptr->pAbortKeyDerivation) + { + return ARGON2_OPERATION_CANCELLED; // Return cancellation code + } /*1.1 Rotating prev_offset if needed */ if (curr_offset % instance->lane_length == 1) { prev_offset = curr_offset - 1; @@ -193,29 +198,30 @@ void fill_segment_ref(const argon2_instance_t *instance, } } } + return ARGON2_OK; } #if CRYPTOPP_BOOL_X64 || CRYPTOPP_BOOL_X86 || CRYPTOPP_BOOL_X32 -extern void fill_segment_sse2(const argon2_instance_t* instance, +extern int fill_segment_sse2(const argon2_instance_t* instance, argon2_position_t position); -extern void fill_segment_avx2(const argon2_instance_t* instance, +extern int fill_segment_avx2(const argon2_instance_t* instance, argon2_position_t position); #endif -void fill_segment(const argon2_instance_t* instance, +int fill_segment(const argon2_instance_t* instance, argon2_position_t position) { #if CRYPTOPP_BOOL_X64 || CRYPTOPP_BOOL_X86 || CRYPTOPP_BOOL_X32 if (HasSAVX2()) { - fill_segment_avx2(instance, position); + return fill_segment_avx2(instance, position); } else if (HasSSE2()) { - fill_segment_sse2(instance, position); + return fill_segment_sse2(instance, position); } else #endif { - fill_segment_ref(instance, position); + return fill_segment_ref(instance, position); } } diff --git a/src/Crypto/Argon2/src/selftest.c b/src/Crypto/Argon2/src/selftest.c index ee30557a..d937fdd1 100644 --- a/src/Crypto/Argon2/src/selftest.c +++ b/src/Crypto/Argon2/src/selftest.c @@ -92,6 +92,7 @@ static int argon2_selftest_ctx (void) context.free_cbk = NULL; context.flags = ARGON2_DEFAULT_FLAGS; context.version = ARGON2_VERSION_13; + context.pAbortKeyDerivation = NULL; /* No abort function */ /* Test execution for Argon2d, Argon2i, Argon2id */ @@ -118,7 +119,7 @@ static int hashtest (uint32_t t, uint32_t m, uint32_t p, const char *pwd, uint8_t out[32]; return (argon2_hash(t, 1 << m, p, pwd, strlen (pwd), salt, strlen (salt), - out, 32, type, ARGON2_VERSION_NUMBER) == ARGON2_OK && + out, 32, type, ARGON2_VERSION_NUMBER, NULL) == ARGON2_OK && memcmp (out, ref, 32) == 0) ? 0 : 1; } diff --git a/src/Crypto/cpu.h b/src/Crypto/cpu.h index 7ee269fb..507ef184 100644 --- a/src/Crypto/cpu.h +++ b/src/Crypto/cpu.h @@ -143,6 +143,8 @@ extern __m128 _mm_shuffle_ps(__m128 _A, __m128 _B, unsigned int _Imm8); extern __m128i _mm_srli_si128(__m128i _A, int _Imm); extern __m128i _mm_slli_si128(__m128i _A, int _Imm); extern __m128i _mm_setzero_si128(); +extern __m128i _mm_mul_epu32(__m128i _A, __m128i _B); +extern __m128i _mm_slli_epi64(__m128i _A, int _Count); #define _mm_xor_si64 _m_pxor #define _mm_empty _m_empty #define _MM_SHUFFLE(fp3,fp2,fp1,fp0) (((fp3) << 6) | ((fp2) << 4) | \ diff --git a/src/Volume/Pkcs5Kdf.cpp b/src/Volume/Pkcs5Kdf.cpp index 2ef60119..bb11d749 100644 --- a/src/Volume/Pkcs5Kdf.cpp +++ b/src/Volume/Pkcs5Kdf.cpp @@ -74,51 +74,51 @@ namespace VeraCrypt void Pkcs5HmacBlake2s_Boot::DeriveKey (const BufferPtr &key, const VolumePassword &password, const ConstBufferPtr &salt, int iterationCount) const { ValidateParameters (key, password, salt, iterationCount); - derive_key_blake2s (password.DataPtr(), (int) password.Size(), salt.Get(), (int) salt.Size(), iterationCount, key.Get(), (int) key.Size()); + derive_key_blake2s (password.DataPtr(), (int) password.Size(), salt.Get(), (int) salt.Size(), iterationCount, key.Get(), (int) key.Size(), NULL); } void Pkcs5HmacBlake2s::DeriveKey (const BufferPtr &key, const VolumePassword &password, const ConstBufferPtr &salt, int iterationCount) const { ValidateParameters (key, password, salt, iterationCount); - derive_key_blake2s (password.DataPtr(), (int) password.Size(), salt.Get(), (int) salt.Size(), iterationCount, key.Get(), (int) key.Size()); + derive_key_blake2s (password.DataPtr(), (int) password.Size(), salt.Get(), (int) salt.Size(), iterationCount, key.Get(), (int) key.Size(), NULL); } #endif void Pkcs5HmacSha256_Boot::DeriveKey (const BufferPtr &key, const VolumePassword &password, const ConstBufferPtr &salt, int iterationCount) const { ValidateParameters (key, password, salt, iterationCount); - derive_key_sha256 (password.DataPtr(), (int) password.Size(), salt.Get(), (int) salt.Size(), iterationCount, key.Get(), (int) key.Size()); + derive_key_sha256 (password.DataPtr(), (int) password.Size(), salt.Get(), (int) salt.Size(), iterationCount, key.Get(), (int) key.Size(), NULL); } void Pkcs5HmacSha256::DeriveKey (const BufferPtr &key, const VolumePassword &password, const ConstBufferPtr &salt, int iterationCount) const { ValidateParameters (key, password, salt, iterationCount); - derive_key_sha256 (password.DataPtr(), (int) password.Size(), salt.Get(), (int) salt.Size(), iterationCount, key.Get(), (int) key.Size()); + derive_key_sha256 (password.DataPtr(), (int) password.Size(), salt.Get(), (int) salt.Size(), iterationCount, key.Get(), (int) key.Size(), NULL); } void Pkcs5HmacSha512::DeriveKey (const BufferPtr &key, const VolumePassword &password, const ConstBufferPtr &salt, int iterationCount) const { ValidateParameters (key, password, salt, iterationCount); - derive_key_sha512 (password.DataPtr(), (int) password.Size(), salt.Get(), (int) salt.Size(), iterationCount, key.Get(), (int) key.Size()); + derive_key_sha512 (password.DataPtr(), (int) password.Size(), salt.Get(), (int) salt.Size(), iterationCount, key.Get(), (int) key.Size(), NULL); } #ifndef WOLFCRYPT_BACKEND void Pkcs5HmacWhirlpool::DeriveKey (const BufferPtr &key, const VolumePassword &password, const ConstBufferPtr &salt, int iterationCount) const { ValidateParameters (key, password, salt, iterationCount); - derive_key_whirlpool (password.DataPtr(), (int) password.Size(), salt.Get(), (int) salt.Size(), iterationCount, key.Get(), (int) key.Size()); + derive_key_whirlpool (password.DataPtr(), (int) password.Size(), salt.Get(), (int) salt.Size(), iterationCount, key.Get(), (int) key.Size(), NULL); } void Pkcs5HmacStreebog::DeriveKey (const BufferPtr &key, const VolumePassword &password, const ConstBufferPtr &salt, int iterationCount) const { ValidateParameters (key, password, salt, iterationCount); - derive_key_streebog (password.DataPtr(), (int) password.Size(), salt.Get(), (int) salt.Size(), iterationCount, key.Get(), (int) key.Size()); + derive_key_streebog (password.DataPtr(), (int) password.Size(), salt.Get(), (int) salt.Size(), iterationCount, key.Get(), (int) key.Size(), NULL); } void Pkcs5HmacStreebog_Boot::DeriveKey (const BufferPtr &key, const VolumePassword &password, const ConstBufferPtr &salt, int iterationCount) const { ValidateParameters (key, password, salt, iterationCount); - derive_key_streebog (password.DataPtr(), (int) password.Size(), salt.Get(), (int) salt.Size(), iterationCount, key.Get(), (int) key.Size()); + derive_key_streebog (password.DataPtr(), (int) password.Size(), salt.Get(), (int) salt.Size(), iterationCount, key.Get(), (int) key.Size(), NULL); } #endif }