weekend warriors!
the crowdstrike incident: let do a deep dive into kernel
-> bug in kernel-level driver led to memory corruption
kernel-level operations
a simplified example of how a kernel driver might interact with system calls:
#include <ntddk.h>
NTSTATUS DriverEntry(PDRIVER_OBJECT DriverObject, PUNICODE_STRING RegistryPath) {
// initialization
DriverObject->MajorFunction[IRP_MJ_CREATE] = CreateDispatch;
DriverObject->MajorFunction[IRP_MJ_CLOSE] = CloseDispatch;
DriverObject->MajorFunction[IRP_MJ_DEVICE_CONTROL] = DeviceControlDispatch;
return STATUS_SUCCESS;
}
NTSTATUS DeviceControlDispatch(PDEVICE_OBJECT DeviceObject, PIRP Irp) {
PIO_STACK_LOCATION irpSp = IoGetCurrentIrpStackLocation(Irp);
ULONG ioControlCode = irpSp->Parameters.DeviceIoControl.IoControlCode;
switch (ioControlCode) {
case IOCTL_CUSTOM_OPERATION:
// perform custom operation
break;
default:
// handle unknown IO control code
break;
}
// complete the IRP
Irp->IoStatus.Status = STATUS_SUCCESS;
IoCompleteRequest(Irp, IO_NO_INCREMENT);
return STATUS_SUCCESS;
}
in this example, a bug in the devicecontroldispatch
function could lead to system-wide issues, as it’s operating in kernel space.
alternative approaches
creating a custom secure kernel
here’s a minimal example of what the entry point of a custom kernel might look like:
void kernel_main() {
// initialize hardware
init_gdt();
init_idt();
init_paging();
// initialize kernel subsystems
init_memory_manager();
init_process_manager();
init_file_system();
// start the first user-space process
start_init_process();
// enter the idle loop
while(1) {
asm("hlt");
}
}
this requires expertise and ongoing maintenance.
operating without a kernel
conceptual example of a “kernelless” system in assembly:
section .text
global _start
_start:
; Direct hardware initialization
; Set up stack
mov esp, stack_top
; Initialize essential hardware
call init_video
call init_keyboard
; Jump to main application code
call main_app
; Halt the CPU
hlt
init_video:
; Direct manipulation of video hardware
ret
init_keyboard:
; Direct manipulation of keyboard controller
ret
main_app:
; Your main application logic
ret
section .bss
stack_bottom:
resb 16384 ; 16 KB for stack
stack_top:
this allows for maximum control but sacrifices the abstractions and hardware management.
improving existing kernel-level drivers
-rigorous error checking:
NTSTATUS SafeOperation(PVOID buffer, SIZE_T bufferSize) {
if (buffer == NULL || bufferSize == 0) {
return STATUS_INVALID_PARAMETER;
}
// validate user-mode buffer
if (!IsBufferAccessible(buffer, bufferSize, TRUE)) {
return STATUS_ACCESS_VIOLATION;
}
// perform operation
return STATUS_SUCCESS;
}
-using memory management functions:
PVOID SafeAllocateMemory(SIZE_T size) {
PVOID buffer = ExAllocatePoolWithTag(NonPagedPool, size, 'tag1');
if (buffer) {
RtlZeroMemory(buffer, size);
}
return buffer;
}
VOID SafeFreeMemory(PVOID buffer) {
if (buffer) {
ExFreePoolWithTag(buffer, 'Tag1');
}
}
today’s mood board:
- caffeinated
- debugged
- ready to code
- ready to sleep
til (today i learned)
- perceptron -> they’re cool
- cuda world (below)
#include <stdio.h>
__global__
void saxpy(int n, float a, float *x, float *y)
{
int i = blockIdx.x*blockDim.x + threadIdx.x;
if (i < n) y[i] = a*x[i] + y[i];
}
int main(void)
{
int N = 1<<20;
float *x, *y, *d_x, *d_y;
x = (float*)malloc(N*sizeof(float));
y = (float*)malloc(N*sizeof(float));
cudaMalloc(&d_x, N*sizeof(float));
cudaMalloc(&d_y, N*sizeof(float));
for (int i = 0; i < N; i++) {
x[i] = 1.0f;
y[i] = 2.0f;
}
cudaMemcpy(d_x, x, N*sizeof(float), cudaMemcpyHostToDevice);
cudaMemcpy(d_y, y, N*sizeof(float), cudaMemcpyHostToDevice);
// Perform saxpy on 1m elements
saxpy<<<(N+255)/256, 256>>>(N, 2.0f, d_x, d_y);
cudaMemcpy(y, d_y, N*sizeof(float), cudaMemcpyDeviceToHost);
float maxError = 0.0f;
for (int i = 0; i < N; i++)
maxError = max(maxError, abs(y[i]-4.0f));
printf("Max error: %f\n", maxError);
cudaFree(d_x);
cudaFree(d_y);
free(x);
free(y);
}
random thought
don’t deploy on friday!