1
0
mirror of https://github.com/veracrypt/VeraCrypt.git synced 2025-11-11 02:58:02 -06:00

Windows driver: overhaul IRP completion path in EncryptedIoQueue, add dual completion threads, precise byte accounting & safer UpdateBuffer

Major changes:
- Added pooled + elastic work item model with retry/backoff (MAX_WI_RETRIES). removed semaphore usage.
- Introduced two completion threads to reduce contention and latency under heavy IO.
- Added BytesCompleted (per IRP) and ActualBytes (per fragment) for correct short read/write accounting. total read/write stats now reflect real transferred bytes instead of requested length.
- Moved decryption of read fragments into IO thread. completion threads now only finalize IRPs (reduces race window and simplifies flow).
- Deferred final IRP completion via FinalizeOriginalIrp to avoid inline IoCompleteRequest re-entrancy. added safe OOM inline fallback.
- Implemented work item pool drain & orderly shutdown (ActiveWorkItems + NoActiveWorkItemsEvent) with robust stop protocol.
- Replaced semaphore-based work item acquisition with spin lock + free list + event (WorkItemAvailableEvent). added exponential backoff for transient exhaustion.
- Added elastic (on-demand) work item allocation with pool vs dynamic origin tracking (FromPool).
- Added FreeCompletionWorkItemPool() for symmetric cleanup; ensured all threads are explicitly awakened during stop.
- Added second completion thread replacing single CompletionThread.
- Hardened UpdateBuffer: fixed parameter name typo, added bounds/overflow checks using IntSafe (ULongLongAdd), validated Count, guarded sector end computation.
- Fixed GPT/system region write protection logic to pass correct length instead of end offset.
- Ensured ASSERTs use fragment‑relative bounds (cast + length) and avoided mixed 64/32 comparisons.
- Added MAX_WI_RETRIES constant. added WiRetryCount field in EncryptedIoRequest.
- Ensured RemoveLock is released only after all queue/accounting updates (OnItemCompleted).
- Reset/read-ahead logic preserved. read-ahead trigger now based on actual completion & zero pending fragment count.
- General refactoring, clearer separation of concerns (TryAcquireCompletionWorkItem / FinalizeOriginalIrp / HandleCompleteOriginalIrp).

Safety / correctness improvements:
- Accurate short read handling (STATUS_END_OF_FILE with true byte count).
- Eliminated risk of double free or premature RemoveLock release on completion paths.
- Prevented potential overflow in sector end arithmetic.
- Reduced contention and potential deadlock scenarios present with previous semaphore wait path.
This commit is contained in:
Mounir IDRASSI
2025-09-07 23:58:35 +09:00
parent 55adda7504
commit 062b385a69
2 changed files with 350 additions and 128 deletions

View File

@@ -19,6 +19,7 @@
#include "Volumes.h"
#include <IntSafe.h>
#define MAX_WI_RETRIES 8
static void AcquireBufferPoolMutex (EncryptedIoQueue *queue)
{
@@ -160,19 +161,23 @@ static void DecrementOutstandingIoCount (EncryptedIoQueue *queue)
static void OnItemCompleted (EncryptedIoQueueItem *item, BOOL freeItem)
{
DecrementOutstandingIoCount (item->Queue);
IoReleaseRemoveLock (&item->Queue->RemoveLock, item->OriginalIrp);
EncryptedIoQueue* queue = item->Queue;
PIRP originalIrp = item->OriginalIrp;
if (NT_SUCCESS (item->Status))
{
if (item->Write)
item->Queue->TotalBytesWritten += item->OriginalLength;
queue->TotalBytesWritten += item->BytesCompleted;
else
item->Queue->TotalBytesRead += item->OriginalLength;
queue->TotalBytesRead += item->BytesCompleted;
}
DecrementOutstandingIoCount (queue);
if (freeItem)
ReleasePoolBuffer (item->Queue, item);
ReleasePoolBuffer (queue, item);
// Release the RemoveLock last after we are done touching the queue
IoReleaseRemoveLock (&queue->RemoveLock, originalIrp);
}
@@ -232,7 +237,7 @@ UpdateBuffer(
SIZE_T secRegionSize,
uint64 bufferDiskOffset,
uint32 bufferLength,
BOOL doUpadte
BOOL doUpdate
)
{
uint64 intersectStart;
@@ -262,17 +267,25 @@ UpdateBuffer(
uint64 sectorOffset = DeList->DE[i].Sectors.Offset;
// Check that sectorOffset and sectorLength are valid within secRegion
if (sectorOffset > secRegionSize ||
if (sectorOffset > (uint64)secRegionSize ||
sectorLength == 0 ||
(sectorOffset + sectorLength) > secRegionSize)
(sectorOffset + sectorLength) > (uint64)secRegionSize)
{
// Invalid entry - skip
continue;
}
// Safely compute end = start + length - 1 (guard against overflow)
ULONGLONG secEnd;
HRESULT hr;
// sectorLength != 0 already checked above
hr = ULongLongAdd(sectorStart, sectorLength - 1, &secEnd);
if (hr != S_OK)
secEnd = (ULONGLONG)-1;
GetIntersection(
bufferDiskOffset, bufferLength,
sectorStart, sectorStart + sectorLength - 1,
sectorStart, secEnd,
&intersectStart, &intersectLength
);
@@ -289,7 +302,7 @@ UpdateBuffer(
continue; // Intersection out of secRegion range
updated = TRUE;
if (doUpadte && buffer != NULL) {
if (doUpdate && buffer != NULL) {
memcpy(
buffer + bufferPos,
secRegion + regionPos,
@@ -306,6 +319,7 @@ UpdateBuffer(
return updated;
}
// Note: completes the IRP and releases the RemoveLock last (after all queue accesses)
static VOID CompleteIrpWorkItemRoutine(PDEVICE_OBJECT DeviceObject, PVOID Context)
{
PCOMPLETE_IRP_WORK_ITEM workItem = (PCOMPLETE_IRP_WORK_ITEM)Context;
@@ -316,69 +330,148 @@ static VOID CompleteIrpWorkItemRoutine(PDEVICE_OBJECT DeviceObject, PVOID Contex
__try
{
// Complete the IRP
// Complete IRP
TCCompleteDiskIrp(workItem->Irp, workItem->Status, workItem->Information);
// Centralized accounting and cleanup
item->Status = workItem->Status;
OnItemCompleted(item, FALSE); // Do not free item here; it will be freed below
}
__finally
{
// If no active work items remain, signal the event
if (InterlockedDecrement(&queue->ActiveWorkItems) == 0)
{
KeSetEvent(&queue->NoActiveWorkItemsEvent, IO_DISK_INCREMENT, FALSE);
}
item->BytesCompleted = workItem->Information;
OnItemCompleted(item, TRUE); // releases RemoveLock last
// Return the work item to the free list
// Return or free the work item depending on origin
if (workItem->FromPool)
{
KeAcquireSpinLock(&queue->WorkItemLock, &oldIrql);
InsertTailList(&queue->FreeWorkItemsList, &workItem->ListEntry);
KeReleaseSpinLock(&queue->WorkItemLock, oldIrql);
// Release the semaphore to signal that a work item is available
KeReleaseSemaphore(&queue->WorkItemSemaphore, IO_DISK_INCREMENT, 1, FALSE);
// Free the item
ReleasePoolBuffer(queue, item);
}
}
// Handles the completion of the original IRP.
static VOID HandleCompleteOriginalIrp(EncryptedIoQueue* queue, EncryptedIoRequest* request)
{
NTSTATUS status = KeWaitForSingleObject(&queue->WorkItemSemaphore, Executive, KernelMode, FALSE, NULL);
if (queue->ThreadExitRequested)
return;
if (!NT_SUCCESS(status))
{
// Handle wait failure: we call the completion routine directly.
// This is not ideal since it can cause deadlock that we are trying to fix but it is better than losing the IRP.
CompleteOriginalIrp(request->Item, STATUS_INSUFFICIENT_RESOURCES, 0);
// immediately wake any waiter
KeSetEvent(&queue->WorkItemAvailableEvent, IO_DISK_INCREMENT, FALSE);
}
else
{
// Obtain a work item from the free list.
KIRQL oldIrql;
KeAcquireSpinLock(&queue->WorkItemLock, &oldIrql);
PLIST_ENTRY freeEntry = RemoveHeadList(&queue->FreeWorkItemsList);
KeReleaseSpinLock(&queue->WorkItemLock, oldIrql);
IoFreeWorkItem(workItem->WorkItem);
TCfree(workItem);
}
PCOMPLETE_IRP_WORK_ITEM workItem = CONTAINING_RECORD(freeEntry, COMPLETE_IRP_WORK_ITEM, ListEntry);
if (InterlockedDecrement(&queue->ActiveWorkItems) == 0)
KeSetEvent(&queue->NoActiveWorkItemsEvent, IO_DISK_INCREMENT, FALSE);
}
__finally
{
}
}
// Increment ActiveWorkItems.
// Helper: acquire a completion work item (from pool if available, else elastic allocation)
static PCOMPLETE_IRP_WORK_ITEM TryAcquireCompletionWorkItem(EncryptedIoQueue* queue)
{
PCOMPLETE_IRP_WORK_ITEM wi = NULL;
KIRQL irql;
// Try pooled work item
KeAcquireSpinLock(&queue->WorkItemLock, &irql);
if (!IsListEmpty(&queue->FreeWorkItemsList))
{
PLIST_ENTRY e = RemoveHeadList(&queue->FreeWorkItemsList);
wi = CONTAINING_RECORD(e, COMPLETE_IRP_WORK_ITEM, ListEntry);
wi->FromPool = TRUE;
}
KeReleaseSpinLock(&queue->WorkItemLock, irql);
// Elastic fallback: allocate on demand
if (!wi)
{
wi = (PCOMPLETE_IRP_WORK_ITEM)TCalloc(sizeof(COMPLETE_IRP_WORK_ITEM));
if (wi)
{
RtlZeroMemory(wi, sizeof(*wi));
wi->WorkItem = IoAllocateWorkItem(queue->DeviceObject);
if (!wi->WorkItem) { TCfree(wi); wi = NULL; }
else wi->FromPool = FALSE;
}
}
return wi;
}
// Complete the original IRP: try pooled work item, then elastic, else inline.
// Precondition: item->Status and item->BytesCompleted are set by the caller.
static VOID FinalizeOriginalIrp(EncryptedIoQueue* queue, EncryptedIoQueueItem* item)
{
// Try to get a work item (from the preallocated pool first, else elastic alloc)
PCOMPLETE_IRP_WORK_ITEM wi = TryAcquireCompletionWorkItem(queue);
if (wi)
{
InterlockedIncrement(&queue->ActiveWorkItems);
KeResetEvent(&queue->NoActiveWorkItemsEvent);
// Prepare the work item.
workItem->Irp = request->Item->OriginalIrp;
workItem->Status = request->Item->Status;
workItem->Information = NT_SUCCESS(request->Item->Status) ? request->Item->OriginalLength : 0;
workItem->Item = request->Item;
wi->Irp = item->OriginalIrp;
wi->Status = item->Status;
wi->Information = item->BytesCompleted;
wi->Item = item;
// Queue the work item.
IoQueueWorkItem(workItem->WorkItem, CompleteIrpWorkItemRoutine, DelayedWorkQueue, workItem);
IoQueueWorkItem(wi->WorkItem, CompleteIrpWorkItemRoutine, DelayedWorkQueue, wi);
return;
}
// Final fallback: inline completion (safe OOM path)
TCCompleteDiskIrp(item->OriginalIrp, item->Status, item->BytesCompleted);
OnItemCompleted(item, TRUE);
}
// Handles the completion of the original IRP.
// Returns TRUE if the caller still owns and should free request parameter.
// Returns FALSE if this function requeued request parameter (caller must NOT free it).
static BOOLEAN HandleCompleteOriginalIrp(EncryptedIoQueue* queue, EncryptedIoRequest* request)
{
PCOMPLETE_IRP_WORK_ITEM wi = TryAcquireCompletionWorkItem(queue);
if (!wi)
{
// No work item available. Either try again later, or give up and complete inline.
request->WiRetryCount++;
if (request->WiRetryCount >= MAX_WI_RETRIES)
{
// Final fallback: complete inline on the completion thread (no locks held)
TCCompleteDiskIrp(request->Item->OriginalIrp, request->Item->Status, request->Item->BytesCompleted);
// Complete accounting and release resources (RemoveLock released last inside OnItemCompleted)
OnItemCompleted(request->Item, TRUE);
// We completed: caller should free request
return TRUE;
}
// Requeue and wait for a pooled work item to become available
ExInterlockedInsertTailList(&queue->CompletionThreadQueue,
&request->CompletionListEntry,
&queue->CompletionThreadQueueLock);
KeSetEvent(&queue->CompletionThreadQueueNotEmptyEvent, IO_DISK_INCREMENT, FALSE);
// Prefer waiting on a real signal over blind sleeps
if (queue->OverflowBackoffMs == 0) queue->OverflowBackoffMs = 1;
LARGE_INTEGER duetime; duetime.QuadPart = -(LONGLONG)queue->OverflowBackoffMs * 10000;
// Wait briefly for a pooled work item to be returned
KeWaitForSingleObject(&queue->WorkItemAvailableEvent, Executive, KernelMode, FALSE, &duetime);
if (queue->OverflowBackoffMs < 32) queue->OverflowBackoffMs <<= 1;
return FALSE; // we requeued so caller must not free request
}
// Queue to system worker thread
InterlockedIncrement(&queue->ActiveWorkItems);
KeResetEvent(&queue->NoActiveWorkItemsEvent);
wi->Irp = request->Item->OriginalIrp;
wi->Status = request->Item->Status;
wi->Information = request->Item->BytesCompleted;
wi->Item = request->Item;
IoQueueWorkItem(wi->WorkItem, CompleteIrpWorkItemRoutine, DelayedWorkQueue, wi);
queue->OverflowBackoffMs = 0;
return TRUE; // caller should free request
}
static VOID CompletionThreadProc(PVOID threadArg)
@@ -386,48 +479,33 @@ static VOID CompletionThreadProc(PVOID threadArg)
EncryptedIoQueue* queue = (EncryptedIoQueue*)threadArg;
PLIST_ENTRY listEntry;
EncryptedIoRequest* request;
UINT64_STRUCT dataUnit;
if (IsEncryptionThreadPoolRunning())
KeSetPriorityThread(KeGetCurrentThread(), LOW_REALTIME_PRIORITY);
while (!queue->ThreadExitRequested)
for (;;)
{
if (!NT_SUCCESS(KeWaitForSingleObject(&queue->CompletionThreadQueueNotEmptyEvent, Executive, KernelMode, FALSE, NULL)))
continue;
if (queue->ThreadExitRequested)
break;
while ((listEntry = ExInterlockedRemoveHeadList(&queue->CompletionThreadQueue, &queue->CompletionThreadQueueLock)))
{
request = CONTAINING_RECORD(listEntry, EncryptedIoRequest, CompletionListEntry);
if (request->EncryptedLength > 0 && NT_SUCCESS(request->Item->Status))
{
ASSERT(request->EncryptedOffset + request->EncryptedLength <= request->Offset.QuadPart + request->Length);
dataUnit.Value = (request->Offset.QuadPart + request->EncryptedOffset) / ENCRYPTION_DATA_UNIT_SIZE;
if (queue->CryptoInfo->bPartitionInInactiveSysEncScope)
dataUnit.Value += queue->CryptoInfo->FirstDataUnitNo.Value;
else if (queue->RemapEncryptedArea)
dataUnit.Value += queue->RemappedAreaDataUnitOffset;
DecryptDataUnits(request->Data + request->EncryptedOffset, &dataUnit, request->EncryptedLength / ENCRYPTION_DATA_UNIT_SIZE, queue->CryptoInfo);
}
// Dump("Read sector %lld count %d\n", request->Offset.QuadPart >> 9, request->Length >> 9);
// Update subst sectors
if((queue->SecRegionData != NULL) && (queue->SecRegionSize > 512)) {
UpdateBuffer(request->Data, queue->SecRegionData, queue->SecRegionSize, request->Offset.QuadPart, request->Length, TRUE);
}
if (request->CompleteOriginalIrp)
{
HandleCompleteOriginalIrp(queue, request);
if (!HandleCompleteOriginalIrp(queue, request))
{
// request was requeued; do not free it now
continue;
}
}
ReleasePoolBuffer(queue, request);
}
if (queue->ThreadExitRequested)
break;
}
PsTerminateSystemThread(STATUS_SUCCESS);
@@ -494,6 +572,9 @@ static VOID IoThreadProc (PVOID threadArg)
#endif
// Perform IO request if no preceding request of the item failed
ULONG_PTR bytesXfer = 0; // track per-fragment transfer
request->ActualBytes = 0; // default
request->WiRetryCount = 0;
if (NT_SUCCESS (request->Item->Status))
{
if (queue->ThreadBlockReadWrite)
@@ -561,6 +642,7 @@ static VOID IoThreadProc (PVOID threadArg)
else
request->Item->Status = TCCachedRead (queue, NULL, request->Data, request->Offset, request->Length);
}
bytesXfer = NT_SUCCESS(request->Item->Status) ? request->Length : 0;
}
else
{
@@ -571,9 +653,21 @@ static VOID IoThreadProc (PVOID threadArg)
else
request->Item->Status = TCCachedRead (queue, &ioStatus, request->Data, request->Offset, request->Length);
bytesXfer = (ULONG_PTR) ioStatus.Information;
// If short read on success, mark STATUS_END_OF_FILE but keep bytesXfer (true partial count)
if (NT_SUCCESS (request->Item->Status) && ioStatus.Information != request->Length)
request->Item->Status = STATUS_END_OF_FILE;
}
// For writes, we can account immediately.
// For reads, defer final accounting until after clamp/copy below.
if (request->Item->Write) {
request->ActualBytes = (ULONG)bytesXfer;
request->Item->BytesCompleted += bytesXfer;
} else {
// Provisional value; will be finalized after decrypt/copy clamp.
request->ActualBytes = (ULONG)bytesXfer;
}
}
if (request->Item->Write)
@@ -584,17 +678,64 @@ static VOID IoThreadProc (PVOID threadArg)
if (request->CompleteOriginalIrp)
{
HandleCompleteOriginalIrp(queue, request);
// call unified path to finish the original IRP
FinalizeOriginalIrp(queue, request->Item);
}
// request itself is no longer needed
ReleasePoolBuffer(queue, request);
}
else
{
BOOL readAhead = FALSE;
if (NT_SUCCESS (request->Item->Status))
memcpy (request->OrigDataBufferFragment, request->Data, request->Length);
// Determine how many bytes we actually read for this fragment
ULONG copyLen = request->ActualBytes;
// Decrypt in IO thread.
if (copyLen > 0 && request->EncryptedLength > 0)
{
UINT64_STRUCT dataUnit;
ULONG encStart = (ULONG)request->EncryptedOffset;
ULONG encInCopy = (copyLen > encStart)
? min((ULONG)copyLen - encStart, (ULONG)request->EncryptedLength)
: 0;
// Trim to full units (avoid returning ciphertext tails)
encInCopy -= (encInCopy % ENCRYPTION_DATA_UNIT_SIZE);
if (copyLen > encStart + encInCopy)
copyLen = encStart + encInCopy;
if (encInCopy > 0)
{
ASSERT((ULONG)request->EncryptedOffset + encInCopy <= copyLen);
dataUnit.Value = (request->Offset.QuadPart + request->EncryptedOffset) / ENCRYPTION_DATA_UNIT_SIZE;
if (queue->CryptoInfo->bPartitionInInactiveSysEncScope)
dataUnit.Value += queue->CryptoInfo->FirstDataUnitNo.Value;
else if (queue->RemapEncryptedArea)
dataUnit.Value += queue->RemappedAreaDataUnitOffset;
DecryptDataUnits(request->Data + request->EncryptedOffset,
&dataUnit,
encInCopy / ENCRYPTION_DATA_UNIT_SIZE,
queue->CryptoInfo);
}
}
// Finalize read accounting after clamp
bytesXfer = copyLen;
request->ActualBytes = (ULONG)bytesXfer;
request->Item->BytesCompleted += bytesXfer;
// Update subst sectors before copying
if (copyLen > 0 && (queue->SecRegionData != NULL) && (queue->SecRegionSize > 512))
{
UpdateBuffer(request->Data, queue->SecRegionData, queue->SecRegionSize,
request->Offset.QuadPart, copyLen, TRUE);
}
if (copyLen > 0)
memcpy(request->OrigDataBufferFragment, request->Data, copyLen);
ReleaseFragmentBuffer (queue, request->Data);
request->Data = request->OrigDataBufferFragment;
@@ -608,8 +749,16 @@ static VOID IoThreadProc (PVOID threadArg)
InterlockedIncrement (&queue->OutstandingIoCount);
}
if (request->CompleteOriginalIrp)
{
ExInterlockedInsertTailList (&queue->CompletionThreadQueue, &request->CompletionListEntry, &queue->CompletionThreadQueueLock);
KeSetEvent (&queue->CompletionThreadQueueNotEmptyEvent, IO_DISK_INCREMENT, FALSE);
}
else
{
// Non-final fragment: no need to involve the completion thread
ReleasePoolBuffer(queue, request);
}
if (readAhead)
{
@@ -689,8 +838,8 @@ static VOID MainThreadProc (PVOID threadArg)
item->Queue = queue;
item->OriginalIrp = irp;
item->Status = STATUS_SUCCESS;
item->BytesCompleted = 0;
IoSetCancelRoutine (irp, NULL);
if (irp->Cancel)
{
CompleteOriginalIrp (item, STATUS_CANCELLED, 0);
@@ -778,7 +927,10 @@ static VOID MainThreadProc (PVOID threadArg)
}
TCfree (buffer);
CompleteOriginalIrp (item, item->Status, NT_SUCCESS (item->Status) ? item->OriginalLength : 0);
// Defer completion to avoid inline IoCompleteRequest re-entrancy
item->BytesCompleted = NT_SUCCESS(item->Status) ? item->OriginalLength : 0;
FinalizeOriginalIrp(queue, item);
continue;
}
@@ -864,7 +1016,7 @@ static VOID MainThreadProc (PVOID threadArg)
}
else if (item->Write
&& (queue->SecRegionData != NULL) && (queue->SecRegionSize > 512)
&& UpdateBuffer (NULL, queue->SecRegionData, queue->SecRegionSize, item->OriginalOffset.QuadPart, (uint32)(item->OriginalOffset.QuadPart + item->OriginalLength - 1), FALSE))
&& UpdateBuffer (NULL, queue->SecRegionData, queue->SecRegionSize, item->OriginalOffset.QuadPart, item->OriginalLength, FALSE))
{
// Prevent inappropriately designed software from damaging important data
Dump ("Preventing write to the system GPT area\n");
@@ -940,7 +1092,7 @@ static VOID MainThreadProc (PVOID threadArg)
if (request->EncryptedLength > 0)
{
UINT64_STRUCT dataUnit;
ASSERT (request->EncryptedOffset + request->EncryptedLength <= request->Offset.QuadPart + request->Length);
ASSERT ((ULONG)request->EncryptedOffset + request->EncryptedLength <= request->Length);
dataUnit.Value = (request->Offset.QuadPart + request->EncryptedOffset) / ENCRYPTION_DATA_UNIT_SIZE;
@@ -1079,16 +1231,42 @@ NTSTATUS EncryptedIoQueueResumeFromHold (EncryptedIoQueue *queue)
return STATUS_SUCCESS;
}
static VOID FreeCompletionWorkItemPool(EncryptedIoQueue* queue)
{
if (!queue->WorkItemPool) return;
// Drain the list for hygiene
KIRQL irql;
KeAcquireSpinLock(&queue->WorkItemLock, &irql);
while (!IsListEmpty(&queue->FreeWorkItemsList))
(void) RemoveHeadList(&queue->FreeWorkItemsList);
KeReleaseSpinLock(&queue->WorkItemLock, irql);
for (ULONG i = 0; i < queue->MaxWorkItems; ++i)
{
if (queue->WorkItemPool[i].WorkItem)
{
IoFreeWorkItem(queue->WorkItemPool[i].WorkItem);
queue->WorkItemPool[i].WorkItem = NULL;
}
}
TCfree(queue->WorkItemPool);
queue->WorkItemPool = NULL;
}
NTSTATUS EncryptedIoQueueStart (EncryptedIoQueue *queue)
{
NTSTATUS status;
EncryptedIoQueueBuffer *buffer;
int i, j, preallocatedIoRequestCount, preallocatedItemCount, fragmentSize;
KIRQL oldIrql;
preallocatedIoRequestCount = EncryptionIoRequestCount;
preallocatedItemCount = EncryptionItemCount;
fragmentSize = EncryptionFragmentSize;
// Clamp preallocation to a sane hard limit
if (preallocatedIoRequestCount > TC_ENC_IO_QUEUE_PREALLOCATED_IO_REQUEST_MAX_COUNT)
preallocatedIoRequestCount = TC_ENC_IO_QUEUE_PREALLOCATED_IO_REQUEST_MAX_COUNT;
queue->StartPending = TRUE;
queue->ThreadExitRequested = FALSE;
@@ -1190,34 +1368,45 @@ retry_preallocated:
// Initialize the free work item list
InitializeListHead(&queue->FreeWorkItemsList);
KeInitializeSemaphore(&queue->WorkItemSemaphore, EncryptionMaxWorkItems, EncryptionMaxWorkItems);
KeInitializeSpinLock(&queue->WorkItemLock);
KeInitializeEvent(&queue->WorkItemAvailableEvent, SynchronizationEvent, FALSE);
queue->OverflowBackoffMs = 1;
queue->MaxWorkItems = EncryptionMaxWorkItems;
queue->WorkItemPool = (PCOMPLETE_IRP_WORK_ITEM)TCalloc(sizeof(COMPLETE_IRP_WORK_ITEM) * queue->MaxWorkItems);
if (!queue->WorkItemPool)
// Enforce [1, VC_MAX_WORK_ITEMS]
if (queue->MaxWorkItems == 0)
queue->MaxWorkItems = 1;
if (queue->MaxWorkItems > VC_MAX_WORK_ITEMS)
queue->MaxWorkItems = VC_MAX_WORK_ITEMS;
{
goto noMemory;
}
// Two-phase allocation to avoid leaving stale LIST_ENTRYs on failure
PCOMPLETE_IRP_WORK_ITEM pool;
// Allocate and initialize work items
pool = (PCOMPLETE_IRP_WORK_ITEM)TCalloc(sizeof(COMPLETE_IRP_WORK_ITEM) * queue->MaxWorkItems);
if (!pool)
goto noMemory;
// Allocate all work items first
for (i = 0; i < (int) queue->MaxWorkItems; ++i)
{
queue->WorkItemPool[i].WorkItem = IoAllocateWorkItem(queue->DeviceObject);
if (!queue->WorkItemPool[i].WorkItem)
pool[i].WorkItem = IoAllocateWorkItem(queue->DeviceObject);
if (!pool[i].WorkItem)
{
// Handle allocation failure
// Free previously allocated work items
for (j = 0; j < i; ++j)
{
IoFreeWorkItem(queue->WorkItemPool[j].WorkItem);
}
TCfree(queue->WorkItemPool);
IoFreeWorkItem(pool[j].WorkItem);
TCfree(pool);
goto noMemory;
}
}
// Insert the work item into the free list
ExInterlockedInsertTailList(&queue->FreeWorkItemsList, &queue->WorkItemPool[i].ListEntry, &queue->WorkItemLock);
// Publish pool and insert entries into the free list under the spin lock
queue->WorkItemPool = pool;
KeAcquireSpinLock(&queue->WorkItemLock, &oldIrql);
for (i = 0; i < (int) queue->MaxWorkItems; ++i)
{
InsertTailList(&queue->FreeWorkItemsList, &queue->WorkItemPool[i].ListEntry);
}
KeReleaseSpinLock(&queue->WorkItemLock, oldIrql);
}
queue->ActiveWorkItems = 0;
@@ -1250,7 +1439,7 @@ retry_preallocated:
KeInitializeSpinLock (&queue->CompletionThreadQueueLock);
KeInitializeEvent (&queue->CompletionThreadQueueNotEmptyEvent, SynchronizationEvent, FALSE);
status = TCStartThread (CompletionThreadProc, queue, &queue->CompletionThread);
status = TCStartThread (CompletionThreadProc, queue, &queue->CompletionThreads[0]);
if (!NT_SUCCESS (status))
{
queue->ThreadExitRequested = TRUE;
@@ -1259,6 +1448,16 @@ retry_preallocated:
goto err;
}
status = TCStartThread (CompletionThreadProc, queue, &queue->CompletionThreads[1]);
if (!NT_SUCCESS (status))
{
queue->ThreadExitRequested = TRUE;
TCStopThread (queue->MainThread, &queue->MainThreadQueueNotEmptyEvent);
TCStopThread (queue->IoThread, &queue->IoThreadQueueNotEmptyEvent);
TCStopThread (queue->CompletionThreads[0], &queue->CompletionThreadQueueNotEmptyEvent);
goto err;
}
#ifdef TC_TRACE_IO_QUEUE
GetElapsedTimeInit (&queue->LastPerformanceCounter);
#endif
@@ -1282,6 +1481,9 @@ err:
FreePoolBuffers (queue);
if (queue->WorkItemPool)
FreeCompletionWorkItemPool(queue);
queue->StartPending = FALSE;
return status;
}
@@ -1300,31 +1502,42 @@ NTSTATUS EncryptedIoQueueStop (EncryptedIoQueue *queue)
Dump ("Queue stopping out=%d\n", queue->OutstandingIoCount);
queue->ThreadExitRequested = TRUE;
// Wake all threads
KeSetEvent(&queue->MainThreadQueueNotEmptyEvent, IO_DISK_INCREMENT, FALSE);
KeSetEvent(&queue->IoThreadQueueNotEmptyEvent, IO_DISK_INCREMENT, FALSE);
KeSetEvent(&queue->CompletionThreadQueueNotEmptyEvent, IO_DISK_INCREMENT, FALSE);
// Wake any GetPoolBuffer waiters (defensive)
KeSetEvent(&queue->PoolBufferFreeEvent, IO_DISK_INCREMENT, FALSE);
TCStopThread (queue->MainThread, &queue->MainThreadQueueNotEmptyEvent);
TCStopThread (queue->IoThread, &queue->IoThreadQueueNotEmptyEvent);
TCStopThread (queue->CompletionThread, &queue->CompletionThreadQueueNotEmptyEvent);
TCStopThread (queue->CompletionThreads[0], &queue->CompletionThreadQueueNotEmptyEvent);
TCStopThread (queue->CompletionThreads[1], &queue->CompletionThreadQueueNotEmptyEvent);
// Wait for active work items to complete
KeResetEvent(&queue->NoActiveWorkItemsEvent);
Dump("Queue stopping active work items=%d\n", queue->ActiveWorkItems);
while (InterlockedCompareExchange(&queue->ActiveWorkItems, 0, 0) > 0)
/*
* Wait for all completion work items to finish.
*
* Work-item drain protocol:
* - queue->ActiveWorkItems is incremented by producers when queuing a completion work item,
* and queue->NoActiveWorkItemsEvent (NotificationEvent) is reset at that time
* (see FinalizeOriginalIrp and HandleCompleteOriginalIrp).
* - CompleteIrpWorkItemRoutine decrements ActiveWorkItems and sets
* NoActiveWorkItemsEvent when the count transitions 1 -> 0.
* - Waiters must not reset the notification event. They simply loop until the count is 0,
* waiting for the final 1 -> 0 transition to signal the event.
*/
Dump("Queue stopping, waiting for active work items (n=%ld)\n",
InterlockedExchangeAdd(&queue->ActiveWorkItems, 0)); // atomic read
for (;;)
{
LONG n = InterlockedExchangeAdd(&queue->ActiveWorkItems, 0); // atomic read
if (n == 0)
break;
KeWaitForSingleObject(&queue->NoActiveWorkItemsEvent, Executive, KernelMode, FALSE, NULL);
// reset the event again in case multiple work items are completing
KeResetEvent(&queue->NoActiveWorkItemsEvent);
}
// Free pre-allocated work items
for (ULONG i = 0; i < queue->MaxWorkItems; ++i)
{
if (queue->WorkItemPool[i].WorkItem)
{
IoFreeWorkItem(queue->WorkItemPool[i].WorkItem);
queue->WorkItemPool[i].WorkItem = NULL;
}
}
TCfree(queue->WorkItemPool);
FreeCompletionWorkItemPool(queue);
TCfree (queue->FragmentBufferA);
TCfree (queue->FragmentBufferB);

View File

@@ -46,6 +46,7 @@ typedef struct _COMPLETE_IRP_WORK_ITEM
ULONG_PTR Information;
void* Item;
LIST_ENTRY ListEntry; // For managing free work items
BOOLEAN FromPool; // TRUE if taken from prealloc pool
} COMPLETE_IRP_WORK_ITEM, * PCOMPLETE_IRP_WORK_ITEM;
typedef struct
@@ -86,8 +87,8 @@ typedef struct
KSPIN_LOCK IoThreadQueueLock;
KEVENT IoThreadQueueNotEmptyEvent;
// Completion thread
PKTHREAD CompletionThread;
// Completion threads
PKTHREAD CompletionThreads[2]; // two threads to handle completions
LIST_ENTRY CompletionThreadQueue;
KSPIN_LOCK CompletionThreadQueueLock;
KEVENT CompletionThreadQueueNotEmptyEvent;
@@ -140,11 +141,16 @@ typedef struct
PCOMPLETE_IRP_WORK_ITEM WorkItemPool;
ULONG MaxWorkItems;
LIST_ENTRY FreeWorkItemsList;
KSEMAPHORE WorkItemSemaphore;
KSPIN_LOCK WorkItemLock;
// Backoff (ms) when overflow work-item alloc fails. grows up to 32ms, reset on success
ULONG OverflowBackoffMs;
volatile LONG ActiveWorkItems;
KEVENT NoActiveWorkItemsEvent;
// signaled whenever a pooled work item returns to the free list
KEVENT WorkItemAvailableEvent;
} EncryptedIoQueue;
@@ -156,6 +162,7 @@ typedef struct
ULONG OriginalLength;
LARGE_INTEGER OriginalOffset;
NTSTATUS Status;
ULONG_PTR BytesCompleted; // actual bytes transferred across all fragments
#ifdef TC_TRACE_IO_QUEUE
LARGE_INTEGER OriginalIrpOffset;
@@ -175,6 +182,8 @@ typedef struct
ULONG EncryptedLength;
uint8 *Data;
uint8 *OrigDataBufferFragment;
ULONG ActualBytes; // actual bytes transferred for this fragment (0 on failure)
UCHAR WiRetryCount; // count how many times we failed to get a work item
LIST_ENTRY ListEntry;
LIST_ENTRY CompletionListEntry;