Newer
Older
AMI-Aptio-BIOS-Reversed / UefiCpuPkg / CpuArchDxe / CpuArchDxe.c
@Ajax Dong Ajax Dong 2 days ago 64 KB Full restructure
/** @file
  CpuArchDxe.c -- CPU Architecture Protocol DXE Driver

  This UEFI DXE driver installs the EFI CPU Architecture Protocol on a Purley
  (Intel Xeon Scalable) IA32/X64 platform.  It provides:
    - CPU interrupt state management (enable/disable/get)
    - Data cache flush (WBINVD)
    - Memory attribute management via MTRR programming + GCD services
    - CPU exception handler initialization and dispatch
    - MTRR state save/restore across GCD memory attribute operations
    - MP MTRR synchronization

  Decompiled from HR650X BIOS PE index 0056 via IDA Pro.
  Original source path:
    PurleySktPkg/Override/IA32FamilyCpuPkg/CpuArchDxe/
    Cpu.c, MtrrSync.c, MemoryAttribute.c, AutoGen.c

  Build config:
    HR6N0XMLK/DEBUG_VS2015/X64/PurleySktPkg/Override/IA32FamilyCpuPkg/
    CpuArchDxe/CpuArchDxe/DEBUG/AutoGen.c

  Key decomplied function map:
    0x300  _enable_w              -- STI (enable interrupts)
    0x370  _disable_w             -- CLI (disable interrupts)
    0x380  __getcallerseflags_w   -- PUSHFQ (read RFLAGS)
    0x390  memset_w               -- memset wrapper
    0x410  InitializeFpu          -- FINIT + CR4.OSFXSR + MXCSR init
    0x330  __rdtsc_w              -- RDTSC
    0x860  _mm_pause_w            -- PAUSE
    0x3B0  memset_buf             -- memset(buf, value, count)
    0x340  Cpuid                  -- CPUID instruction wrapper
    0x61F  CpuContextSwitch       -- Full CPU context save/switch
    0x430  CommonExceptionHandler -- common exception handler
    0x610  ExceptionStub          -- exception stub entry
    0x834  GetExceptionTemplate   -- get stub header template info
    0x858  SetIdtEntry            -- patch exception stub vector
    0x910  ReadIdtr               -- SIDT instruction
    0x920  WriteIdtr              -- LIDT instruction
    0x990  _ModuleEntryPoint      -- driver entry point
    0x9E0  DriverMainInit         -- main init sequence
    0xDB0  CpuArchDxeEntry        -- entry wrapper -> sub_13A0
    0xDA0  CpuArchDxeUnload       -- unload handler
    0x12C0 InitializeCpuArch2    -- CPU arch protocol phase 2
    0x13A0 CpuArchDxeMain        -- core init (exceptions, protocol, MTRR, cache)
    0x1880 MtrrSyncInit           -- MTRR sync allocation + registration
    0x1950 AllocateMtrrBuffer     -- AllocateZeroedPages wrapper
    0x1A60 CacheTypeToMemAttr     -- convert cache type to GCD attribute
    0x1AC0 GetMaxPhysicalAddr     -- CPUID 0x80000008 physical address size
    0x1B70 FindMemMapEntry        -- GCD map range lookup
    0x1C80 SetMemAttrViaGcd      -- set memory attributes via GCD
    0x1E40 ProgramMemAttributes   -- program all memory attributes
    0x234C UefiBootServicesLibCtor -- gImageHandle/gST/gBS init
    0x23C8 UefiRuntimeServicesLibCtor -- gRT init
    0x243C GetMtrrDefaultType     -- read MTRR_DEF_TYPE MSR 0x2FF
    0x2460 WriteBackInvalidate    -- WBINVD based on cache type
    0x2480 BitFieldRead           -- bit field read (32-bit)
    0x24FC BitFieldRead64         -- bit field read (64-bit)
    0x2580 BitFieldOr64           -- bit field OR (64-bit)
    0x260C BitFieldAnd64          -- bit field AND (64-bit)
    0x268C DivU64x64Remainder     -- 64/64 division with remainder
    0x2704 GetPowerOfTwo64        -- highest set bit -> power of 2
    0x2710 BitFieldReadU64        -- BitFieldRead64 wrapper
    0x2754 RShiftU64              -- logical right shift 64-bit
    0x2818 DebugPrint             -- debug print wrapper
    0x2860 DebugAssert            -- debug assert wrapper
    0x28A0 ReportStatusCodeEnable -- debug level check
    0x28A4 AssertLevelCheck       -- assert level check
    0x296C GetPcdProtocol         -- locate PCD protocol
    0x29F8 RegisterMtrrNotify     -- register protocol notify for MTRR
    0x2A28 AllocatePool           -- gBS->AllocatePool wrapper
    0x2A58 FreePool               -- gBS->FreePool wrapper
    0x2A9C UefiLibConstructor     -- dummy constructor
    0x2AA0 EfiGetSystemConfigTable -- locate config table by GUID
    0x2B64 DxeHobLibConstructor   -- get HOB list
    0x2BE8 DxeHobLibCtor          -- HOB lib init wrapper
    0x2BF8 InitializePciExpress   -- get PCIE base address
    0x2C18 PciExpressLibAddr      -- PCIE config address translation
    0x2C54 ResetColdSystem        -- CMOS-based reset + delay
    0x2DF8 IsX2Apic               -- check if x2APIC enabled
    0x2E3C GetApicBase            -- read APIC base MSR
    0x2E78 ReadLocalApicReg       -- read local APIC register
    0x2ECC WriteLocalApicReg      -- write local APIC register
    0x2F30 GetApicMode            -- detect xAPIC vs x2APIC
    0x2F8C GetApicId              -- get local APIC ID
    0x3048 ProgramCpuFeatures     -- program MSR features
    0x30B0 GetVariableMtrrCnt     -- read MSR 0xFE MTRR cap count
    0x30EC GetCpuCount            -- get variable MTRR count offset
    0x311C GetMtrrDefTypeFlags    -- read MTRR_DEF_TYPE MSR 0x2FF
    0x3220 ReadAllVariableMtrrs   -- read variable MTRR pairs
    0x32C4 BuildMtrrDescriptor   -- build MTRR descriptor table
    0x3374 GetMtrrMemoryAttr      -- get memory attribute for range
    0x4868 MtrrGetAllMtrrs        -- read all (fixed+variable) MTRRs
    0x4984 MtrrSupported          -- check MTRR support via CPUID+MSR
    0x4A00 DxeServicesLibCtor     -- locate DXE services table
    0x4A88 InitExceptionHandlers  -- main exception handler init
    0x4D3C DispatchException      -- exception context dispatch
    0x4FA4 SetupIdtTable          -- install IDT entries
    0x5094 DbgPrint               -- serial port debug print
    0x5168 DumpExceptionContext   -- dump all regs on exception
    0x5490 DumpImageInfo          -- find + dump faulting image
    0x55DC SaveGlobalServices     -- save gST/gBS/gRT
    0x568C MpSyncCall             -- MP sync callback
    0x5778 strlen                 -- string length
    0x5900 CopyMem                -- memcpy wrapper
    0x5A04 IoWrite16             -- 16-bit IO port write
    0x5A44 IoRead32              -- 32-bit IO port read
    0x6A70 AcquireSpinLock        -- acquire spin lock
    0x6AA4 TryAcquireSpinLock     -- try acquire spin lock
    0x6B20 ReleaseSpinLock        -- release spin lock

  SHA256: 82d5685e5287951a1cca2f25f81ad0b53a7235e3d03c9f6ba2372b5d1deb7d11
**/

#include "CpuArchDxe.h"

/*==========================================================================
 * Global pointer tables (populated by ProcessLibraryConstructorList)
 *==========================================================================*/

/// gImageHandle -- The EFI image handle for this driver.
volatile UINT64  gImageHandle  = 0;           // qword_8E00

/// gST -- Pointer to EFI System Table.
volatile UINT64  gST           = 0;           // qword_8DF0

/// gBS -- Pointer to EFI Boot Services Table.
volatile UINT64  gBS           = 0;           // qword_8DF8

/// gRT -- Pointer to EFI Runtime Services Table.
volatile UINT64  gRT           = 0;           // qword_8E08

/// gDS -- Pointer to DXE Services Table (located by GUID in .data).
volatile UINT64  gDS           = 0;           // qword_8E30

/// gPcdDb -- PCD database pointer (located by GUID).
volatile UINT64  gPcdDb        = 0;           // qword_8E18

/// gHobList -- HOB list pointer (from DxeHobLib).
volatile UINT64  gHobList      = 0;           // qword_8E20

/// gMmPciBase -- PCI Express MMIO base address.
volatile UINT64  gMmPciBase    = 0;           // qword_8E28

/// gPhysicalMax -- Maximum physical address (page-aligned, CPUID-based).
volatile UINT64  gPhysicalMax  = 0;           // qword_8EA0

/// gPhysicalMaxRaw -- Raw maximum physical address.
volatile UINT64  gPhysicalMaxRaw = 0;         // qword_8EA8

/// gMtrrBuffer -- Allocated MTRR sync/state buffer (608 bytes).
volatile UINT64  gMtrrBuffer   = 0;           // qword_8EB0

/// gMtrrSyncLock -- Non-zero during MTRR sync.
volatile UINT8   gMtrrSyncLock = 0;           // byte_8EB8

/// gReservedVectors -- pointer to reserved exception vector table (22528 B).
volatile UINT64  gReservedVectors = 0;        // qword_8E90

/// gExternalIntHandler -- pointer to external interrupt handler table.
volatile UINT64  gExternalIntHandler = 0;     // qword_8E98

/// gVariableMtrrCountOffset -- difference: total - available variable MTRRs.
volatile UINT32  gVariableMtrrCountOffset = 0;// dword_8DE0

/*==========================================================================
 * Built-in data tables (.rdata / .data)
 *==========================================================================*/

/// dword_8D20[33] -- Fixed MTRR range geometry: 11 entries x 3 DWORDs.
///   Each entry: [Reserved, BaseAddress, StepSize]
///   Used in sub_1E40 to iterate through fixed MTRR ranges,
///   merging adjacent ranges with the same memory type.


/// dword_79D8[] -- Bit test table for CPU context switch indexing.
///   Used by sub_61F to determine the correct save slot index.

/// off_8CD0[] -- Function pointer table for CPU Arch Protocol interface.
///   Registered during InstallMultipleProtocolInterfaces.

/*==========================================================================
 * CPUID wrapper (sub_340 at 0x340)
 *==========================================================================*/

/**
  Execute the CPUID instruction with the given leaf.
  Calls CPUID and returns results in optional output parameters.

  @param[in]  Leaf   CPUID input leaf (EAX)
  @param[out] Eax    CPUID EAX result (optional)
  @param[out] Ebx    CPUID EBX result (optional)
  @param[out] Ecx    CPUID ECX result (optional)
  @param[out] Edx    CPUID EDX result (optional)

  @return Returns the CPUID leaf value passed.
**/
UINT64
Cpuid (
  UINT32   Leaf,
  UINT32  *Eax,
  UINT32  *Ebx,
  UINT32  *Ecx,
  UINT32  *Edx
  )
{
  UINT32 RegEax, RegEbx, RegEcx, RegEdx;

  __asm {
    mov     eax, Leaf
    cpuid
    mov     RegEax, eax
    mov     RegEbx, ebx
    mov     RegEcx, ecx
    mov     RegEdx, edx
  }

  if (Eax) *Eax = RegEax;
  if (Ebx) *Ebx = RegEbx;
  if (Ecx) *Ecx = RegEcx;
  if (Edx) *Edx = RegEdx;

  return Leaf;
}

/*==========================================================================
 * TSC / Delay / HLT / PAUSE helpers
 *==========================================================================*/

/**
  Read Time-Stamp Counter (RDTSC).

  @return 64-bit TSC value.
**/
UINT64
ReadTsc (
  VOID
  )
{
  UINT64 Tsc;
  __asm {
    rdtsc
    shl     rdx, 32
    or      rax, rdx
    mov     Tsc, rax
  }
  return Tsc;
}

/**
  Enable interrupts (STI).
**/
VOID
EnableInterrupts (
  VOID
  )
{
  __asm { sti }
}

/**
  Disable interrupts (CLI).
**/
VOID
DisableInterrupts (
  VOID
  )
{
  __asm { cli }
}

/**
  Read RFLAGS register (PUSHFQ then POP).

  @return Current RFLAGS value.
**/
UINT64
GetRflags (
  VOID
  )
{
  UINT64 Rflags;
  __asm {
    pushfq
    pop    rax
    mov    Rflags, rax
  }
  return Rflags;
}

/**
  Execute HLT instruction (halt CPU until next interrupt).
**/
VOID
CpuHlt (
  VOID
  )
{
  __asm { hlt }
}

/**
  Execute PAUSE instruction (hint for spin-wait loops).
**/
VOID
CpuPause (
  VOID
  )
{
  __asm { pause }
}

/*==========================================================================
 * Segment register / IDTR / GDTR helpers
 *==========================================================================*/

/**
  Read Task Register (STR instruction).

  @param[out] TaskRegister  Receives the 16-bit selector value.
**/
VOID
ReadStr (
  UINT16 *TaskRegister
  )
{
  __asm {
    str    ax
    mov    rcx, TaskRegister
    mov    [rcx], ax
  }
}

/**
  Read Local Descriptor Table Register (SLDT instruction).

  @param[out] LocalDescTable  Receives the 16-bit selector value.
**/
VOID
ReadSldt (
  UINT16 *LocalDescTable
  )
{
  __asm {
    sldt   ax
    mov    rcx, LocalDescTable
    mov    [rcx], ax
  }
}

/**
  Read IDTR (SIDT instruction).

  @param[out] Limit  Receives the IDT limit (size-1).
  @param[out] Base   Receives the IDT base address.
**/
VOID
ReadIdtr (
  UINT16 *Limit,
  UINT64 *Base
  )
{
  __asm {
    sidt   fword ptr [rcx]
  }
}

/**
  Load IDTR (LIDT instruction).

  @param[in] Limit  IDT limit (size-1).
  @param[in] Base   IDT base address.
**/
VOID
WriteIdtr (
  UINT16  Limit,
  UINT64  Base
  )
{
  __asm {
    lidt   fword ptr [rcx]
  }
}

/**
  Read GDTR (SGDT instruction).

  @param[out] Limit  Receives the GDT limit.
  @param[out] Base   Receives the GDT base address.
**/
VOID
ReadGdtr (
  UINT16 *Limit,
  UINT64 *Base
  )
{
  __asm {
    sgdt   fword ptr [rcx]
  }
}

/*==========================================================================
 * MSR Read/Write
 *==========================================================================*/

/**
  Read an MSR (RDMSR instruction).

  @param[in]  Msr     MSR address (ECX).

  @return 64-bit MSR value.
**/
UINT64
ReadMsr (
  UINT32 Msr
  )
{
  UINT64 Value;

  __asm {
    mov     ecx, Msr
    rdmsr
    shl     rdx, 32
    or      rax, rdx
    mov     Value, rax
  }

  return Value;
}

/**
  Write an MSR (WRMSR instruction).

  @param[in]  Msr     MSR address (ECX).
  @param[in]  Value   64-bit value to write.
**/
VOID
WriteMsr (
  UINT32     Msr,
  UINT64     Value
  )
{
  __asm {
    mov     ecx, Msr
    mov     eax, Value
    mov     edx, Value >> 32
    wrmsr
  }
}

/*==========================================================================
 * IO Port access (from BaseIoLibIntrinsic)
 *==========================================================================*/

/**
  Read 16-bit from IO port.

  @param[in]  Port    IO port address.
  @param[in]  Value   Value to write.
**/
UINT16
IoWrite16 (
  UINT16  Port,
  UINT16  Value
  )
{
  __asm {
    mov     dx, Port
    mov     ax, Value
    out     dx, ax
  }
  return Value;
}

/**
  Read 32-bit from IO port.

  @param[in]  Port    IO port address (must be 4-byte aligned).

  @return 32-bit value read.
**/
UINT32
IoRead32 (
  UINT16  Port
  )
{
  UINT32 Value;
  __asm {
    mov     dx, Port
    in      eax, dx
    mov     Value, eax
  }
  return Value;
}

/*==========================================================================
 * Memset wrapper (sub_3B0 at 0x3B0, sub_390 at 0x390)
 *==========================================================================*/

/**
  Fill buffer with a byte value (memset).

  @param[in]  buf     Destination buffer.
  @param[in]  count   Number of bytes to fill.
  @param[in]  value   Fill byte.

  @return Pointer to buffer.
**/
VOID *
Memset (
  VOID         *buf,
  UINT64        count,
  UINT8         value
  )
{
  UINT8 *p = (UINT8 *)buf;
  while (count--) *p++ = value;
  return buf;
}

/**
  Zero memory (Zeromem equivalent).  Fills buffer with 0.

  @param[in]  buf     Destination buffer.
  @param[in]  count   Number of bytes to zero.
**/
VOID *
ZeroMemBuf (
  VOID         *buf,
  UINT64        count
  )
{
  return Memset (buf, count, 0);
}

/*==========================================================================
 * Bit field operations (BaseLib)
 *==========================================================================*/

/**
  Read bit field from 64-bit value, bits StartBit..EndBit (inclusive).
  (sub_2710 at 0x2710)

  @param[in]  Value      Input value.
  @param[in]  StartBit   Starting bit position (0-63).
  @param[in]  EndBit     Ending bit position (0-63, >= StartBit).

  @return Extracted bit field (right-shifted to bit 0).
**/
UINT64
BitFieldRead64 (
  UINT64  Value,
  UINT32  StartBit,
  UINT32  EndBit
  )
{
  return (Value >> StartBit) & ((1ULL << (EndBit - StartBit + 1)) - 1);
}

/**
  Logical shift right 64-bit value (RShiftU64).
  (sub_2754 at 0x2754)

  @param[in]  Value   Value to shift.
  @param[in]  Shift   Number of bits to shift (0-63).

  @return Shifted value.
**/
UINT64
RShiftU64 (
  UINT64  Value,
  UINT32  Shift
  )
{
  return Value >> Shift;
}

/**
  Logical shift left 64-bit value (LShiftU64).

  @param[in]  Value   Value to shift.
  @param[in]  Shift   Number of bits to shift (0-63).

  @return Shifted value.
**/
UINT64
LShiftU64 (
  UINT64  Value,
  UINT32  Shift
  )
{
  return Value << Shift;
}

/**
  Multiply 64-bit value by 32-bit multiplier.

  @param[in]  Value       64-bit multiplicand.
  @param[in]  Multiplier  32-bit multiplier.

  @return Product.
**/
UINT64
MultU64x32 (
  UINT64  Value,
  UINT32  Multiplier
  )
{
  return Value * Multiplier;
}

/**
  Divide 64-bit value by 32-bit divisor.

  @param[in]  Dividend  64-bit dividend.
  @param[in]  Divisor   32-bit divisor.

  @return Quotient.
**/
UINT64
DivU64x32 (
  UINT64  Dividend,
  UINT32  Divisor
  )
{
  return Dividend / Divisor;
}

/**
  Get Power of Two for 64-bit value.  Finds the highest set bit
  and returns the corresponding power of two.

  @param[in]  Value   Input value.

  @return Highest power of two <= Value.
**/
UINT64
GetPowerOfTwo64 (
  UINT64  Value
  )
{
  UINT64 Result = 1;
  while (Value >>= 1)
    Result <<= 1;
  return Result;
}

/**
  Divide 64-bit by 64-bit with remainder.

  @param[in]  Dividend    64-bit dividend.
  @param[in]  Divisor     64-bit divisor.
  @param[out] Remainder   Optional remainder output.

  @return Quotient.
**/
UINT64
DivU64x64Remainder (
  UINT64  Dividend,
  UINT64  Divisor,
  UINT64 *Remainder
  )
{
  if (Remainder)
    *Remainder = Dividend % Divisor;
  return Dividend / Divisor;
}

/**
  Extract a bit field from a 32-bit value.
  (sub_2480 at 0x2480, from BaseLib BitField.c)

  @param[in]  Value      Input 32-bit value.
  @param[in]  StartBit   Starting bit position (0-31).
  @param[in]  EndBit     Ending bit position (0-31, >= StartBit).

  @return Bit field value (right-shifted to bit 0).
**/
UINT64
BitFieldRead32 (
  UINT32  Value,
  UINT32  StartBit,
  UINT32  EndBit
  )
{
  return (Value >> StartBit) & ((1ULL << (EndBit - StartBit + 1)) - 1);
}

/**
  Bit field OR into 64-bit value (BitFieldOr64).
  (sub_24FC at 0x24FC)

  @param[in]  Value      Input value.
  @param[in]  StartBit   Starting bit position.
  @param[in]  EndBit     Ending bit position.
  @param[in]  OrValue    Value to OR into the field.

  @return Result with field OR'd.
**/
UINT64
BitFieldOr64 (
  UINT64  Value,
  UINT32  StartBit,
  UINT32  EndBit,
  UINT64  OrValue
  )
{
  UINT64 Mask;
  Mask = LShiftU64 (RShiftU64 (OrValue, EndBit - StartBit) & 1, StartBit);
  return Value | Mask;
}

/*==========================================================================
 * FPU/SSE Init (sub_410 at 0x410)
 *==========================================================================*/

/**
  Initialize FPU and SSE state.
    - Execute FINIT
    - Set CR4.OSFXSR bit (bit 9) for SSE enable
    - Set MXCSR to 0x1F80 (all exceptions masked, round-to-nearest)

  @return New CR4 value.
**/
UINT64
InitializeFpuSse (
  VOID
  )
{
  UINT64 Cr4;

  __asm { finit }

  Cr4 = __readcr4 ();
  Cr4 |= 0x200;           // CR4.OSFXSR
  __writecr4 (Cr4);

  __asm {
    ldmxcsr dword ptr [Cr4_2]
    Cr4_2: .long 0x1F80
  }

  return Cr4;
}

/*==========================================================================
 * Debug Library Helpers
 *==========================================================================*/

/**
  Check if reporting status codes / debug is enabled.
  (sub_28A0 at 0x28A0)

  @return 1 if debug is enabled.
**/
UINT8
DebugEnabled (
  VOID
  )
{
  return 1;
}

/**
  Check if assert at given level should fire.
  (sub_28A4 at 0x28A4)

  @param[in]  AssertLevel  Assert level mask.

  @return Non-zero if assert should fire.
**/
UINT8
AssertEnabled (
  UINT32  AssertLevel
  )
{
  return (AssertLevel != 0);
}

/**
  DEBUG print handler.  Delegates to DebugLib protocol.
  (sub_2818 at 0x2818)

  @param[in]  ErrorLevel  Debug error level.
  @param[in]  Format      Print format string.
  @param[in]  ...         Variable arguments.
**/
VOID
DebugPrint (
  UINT32       ErrorLevel,
  const CHAR8 *Format,
  ...
  )
{
  VA_LIST         Args;
  VA_START (Args, Format);
  VA_END (Args);
}

/**
  ASSERT handler (DebugAssert).
  (sub_2860 at 0x2860)

  @param[in]  FileName     Source file name string.
  @param[in]  LineNumber   Source line number.
  @param[in]  Description  Assert condition description.
**/
VOID
DebugAssert (
  const CHAR8  *FileName,
  UINT32        LineNumber,
  const CHAR8  *Description
  )
{
  while (1) {}
}

/*==========================================================================
 * EfiGetSystemConfigurationTable (sub_2AA0 at 0x2AA0)
 *==========================================================================*/

/**
  Locate a system configuration table by GUID.

  @param[in]  TableGuid   GUID of the table to locate.
  @param[out] Table       Receives pointer to the table.

  @return EFI_SUCCESS if found, EFI_NOT_FOUND otherwise.
**/
UINT64
EfiGetSystemConfigurationTable (
  UINT64  **TableGuid,
  UINT64  ***Table
  )
{
  UINT64  i;
  UINT64  NumberOfEntries;
  UINT64  ConfigTable;

  if (!TableGuid)
    DebugAssert (__FILE__, __LINE__, "TableGuid != ((void *) 0)");

  if (!Table)
    DebugAssert (__FILE__, __LINE__, "Table != ((void *) 0)");

  *Table = 0;

  // gST->NumberOfTableEntries is at offset 104 in EFI_SYSTEM_TABLE
  NumberOfEntries = *(UINT64 *)(gST + 104);
  if (NumberOfEntries == 0)
    return 0x800000000000000EULL;  // EFI_NOT_FOUND

  // gST->ConfigurationTable is at offset 112
  for (i = 0; i < NumberOfEntries; i++) {
    ConfigTable = *(UINT64 *)(gST + 112);
    if (*(UINT32 *)((*TableGuid) + 0) ==
          *(UINT32 *)(ConfigTable + 24 * i + 0) &&
        *(UINT16 *)((*TableGuid) + 4) ==
          *(UINT16 *)(ConfigTable + 24 * i + 4) &&
        *(UINT16 *)((*TableGuid) + 6) ==
          *(UINT16 *)(ConfigTable + 24 * i + 6) &&
        *(UINT32 *)((*TableGuid) + 8) ==
          *(UINT32 *)(ConfigTable + 24 * i + 8) &&
        *(UINT32 *)((*TableGuid) + 12) ==
          *(UINT32 *)(ConfigTable + 24 * i + 12)) {
      *Table = (UINT64 **)*(UINT64 *)(ConfigTable + 24 * i + 16);
      return 0;
    }
  }

  return 0x800000000000000EULL;
}

/*==========================================================================
 * Memory allocation helpers
 *==========================================================================*/

/**
  Allocate boot services data pages and zero fill.
  (sub_1950 at 0x1950)

  @param[in]  Pages   Number of pages to allocate.

  @return Physical address of allocated pages, or 0 on failure.
**/
UINT64
AllocateZeroedPages (
  UINT64  Pages
  )
{
  UINT64  Status;
  UINT64  Memory;
  UINT64  Bytes;

  Bytes = ((Pages & 0xFFF) != 0) + (Pages >> 12);

  // gBS->AllocatePages (Type = 1 = AllocateAnyPages)
  // MemoryType = 10 = EfiBootServicesData
  // Offset 40 in gBS vtable = AllocatePages
  Status = ((UINT64 (__fastcall *)(UINT64, UINT64, UINT64, UINT64 *))
            (*(UINT64 *)(gBS + 40)))(1, 10, Bytes, &Memory);

  if (DebugEnabled () && Status)
    DebugAssert (__FILE__, __LINE__, "!EFI_ERROR (Status)");

  ZeroMemBuf ((VOID *)Memory, Bytes << 12);
  return Memory;
}

/**
  Allocate pool memory of given type and size.
  (sub_2A28 at 0x2A28)

  @param[in]  PoolType   Pool type (0 = BootServicesData, 1 = Runtime).
  @param[in]  Size       Size in bytes.

  @return Pointer to allocated buffer, or 0 on failure.
**/
UINT64
AllocatePool (
  UINT64  PoolType,
  UINT64  Size
  )
{
  UINT64  Buffer;
  UINT64  Status;

  // gBS->AllocatePool at offset 24
  Status = ((UINT64 (__fastcall *)(UINT64, UINT64, UINT64 *))
            (*(UINT64 *)(gBS + 24)))(PoolType, Size, &Buffer);
  if (Status)
    return 0;

  return Buffer;
}

/**
  Free pool memory.
  (sub_2A58 at 0x2A58)

  @param[in]  Buffer   Pointer to buffer allocated via AllocatePool.
**/
VOID
FreePool (
  UINT64  Buffer
  )
{
  // gBS->FreePool at offset 32
  ((UINT64 (__fastcall *)(UINT64))
   (*(UINT64 *)(gBS + 32)))(Buffer);
}

/**
  Zero fill a region of memory using 32-bit stores.

  @param[in]  Buffer   Pointer to buffer to fill.
  @param[in]  Length   Length of buffer in bytes.
  @param[in]  Value    32-bit value to fill.
**/
VOID
SetMem32 (
  VOID    *Buffer,
  UINT64   Length,
  UINT32   Value
  )
{
  UINT32 *Buf = (UINT32 *)Buffer;
  UINT64  i;

  for (i = 0; i < Length / sizeof (UINT32); i++)
    Buf[i] = Value;
}

/**
  Copy memory region.

  @param[out] Destination  Destination buffer.
  @param[in]  Source       Source buffer.
  @param[in]  Length       Number of bytes to copy.
**/
VOID
CopyMem (
  VOID         *Destination,
  const VOID   *Source,
  UINT64        Length
  )
{
  UINT64       i;
  UINT8       *Dst = (UINT8 *)Destination;
  const UINT8 *Src = (const UINT8 *)Source;

  for (i = 0; i < Length; i++)
    Dst[i] = Src[i];
}

/**
  Zero fill memory region.

  @param[out] Buffer    Buffer to zero.
  @param[in]  Length    Length in bytes.
**/
VOID
ZeroMem (
  VOID    *Buffer,
  UINT64   Length
  )
{
  UINT64  i;
  UINT8  *Buf = (UINT8 *)Buffer;

  for (i = 0; i < Length; i++)
    Buf[i] = 0;
}

/**
  Microsecond stall via gBS->Stall.
  (gBS->Stall at offset 18 from gBS vtable)

  @param[in]  Microseconds  Number of microseconds to stall.
**/
VOID
Stall (
  UINT32  Microseconds
  )
{
  ((UINT64 (__fastcall *)(UINT32))
   (*(UINT64 *)(gBS + 18)))(Microseconds);
}

/*==========================================================================
 * Physical Address Size Detection (sub_1AC0 at 0x1AC0)
 *==========================================================================*/

/**
  Get maximum physical address from CPUID leaf 0x80000008.
  Falls back to 36-bit (64GB) if CPUID does not support extended functions.

  @return Maximum physical address (page-aligned, bit mask).
**/
UINT64
GetMaxPhysicalAddress (
  VOID
  )
{
  UINT32 MaxExtended;
  UINT32 AddressSize;
  UINT32 Regs[5];

  // CPUID 0x80000000 to check max extended leaf
  Cpuid (0x80000000, Regs, Regs + 1, Regs + 2, Regs + 3);

  if (*Regs < 0x80000008) {
    gPhysicalMaxRaw = 0xFFFFFFFFFULL;    // 36-bit, 64 GB
    gPhysicalMax    = gPhysicalMaxRaw & 0xFFFFFFFFFFFFF000ULL;
    return gPhysicalMax;
  }

  Cpuid (0x80000008, Regs, Regs + 1, Regs + 2, Regs + 3);
  gPhysicalMaxRaw = (1ULL << (AddressSize & 0xFF)) - 1;
  gPhysicalMax    = gPhysicalMaxRaw & 0xFFFFFFFFFFFFF000ULL;

  return gPhysicalMax;
}

/*==========================================================================
 * MTRR Support Detection (sub_4984 at 0x4984)
 *==========================================================================*/

/**
  Check if MTRRs are supported on this CPU.
  Tests CPUID.01H:EDX[12] (MTRR), then checks MTRRCAP MSR (0xFE)
  for VCNT > 0 and FIXED support.

  @return 1 if MTRRs are supported, 0 otherwise.
**/
UINT8
MtrrSupported (
  VOID
  )
{
  UINT32  Edx;
  UINT64  MtrrCap;

  Cpuid (1, NULL, NULL, NULL, &Edx);

  // Check CPUID.01H:EDX[12] = MTRR support
  if (!(BitFieldRead32 (Edx, 12, 12) & 1))
    return 0;

  MtrrCap = ReadMsr (0xFE);       // IA32_MTRR_CAPABILITIES

  // Check VCNT (bits 7:0) != 0
  if (!(BitFieldOr64 (MtrrCap, 0, 7)))
    return 0;

  // Check FIXED (bit 8) == 1
  if (!(BitFieldOr64 (MtrrCap, 8, 8)))
    return 0;

  return 1;
}

/*==========================================================================
 * Variable MTRR Count (sub_30B0 at 0x30B0)
 *==========================================================================*/

/**
  Get the number of variable MTRRs from IA32_MTRR_CAPABILITIES MSR (0xFE).

  @return Variable MTRR count (bits 7:0 of MSR 0xFE).
**/
UINT32
GetVariableMtrrCount (
  VOID
  )
{
  return (UINT32)(ReadMsr (0xFE) & 0xFF);
}

/**
  Get the effective MTRR count after subtracting offset.
  (sub_30EC at 0x30EC)

  @return Effective variable MTRR count (0 if MTRR not supported).
**/
UINT32
GetEffectiveVariableMtrrCount (
  VOID
  )
{
  UINT32 Count;
  UINT32 VarCount;

  if (!MtrrSupported ())
    return 0;

  VarCount = GetVariableMtrrCount ();
  Count    = VarCount;
  if (Count >= gVariableMtrrCountOffset)
    return Count - gVariableMtrrCountOffset;

  return 0;
}

/*==========================================================================
 * MTRR Default Type (sub_311C at 0x311C)
 *==========================================================================*/

/**
  Get the MTRR default type from MSR 0x2FF (IA32_MTRR_DEF_TYPE bits 7:0).

  @return Default memory type (0=UC, 1=WC, 4=WT, 5=WP, 6=WB).
**/
UINT8
GetMtrrDefaultType (
  VOID
  )
{
  if (!MtrrSupported ())
    return 0;

  return (UINT8)(ReadMsr (0x2FF) & 7);
}

/*==========================================================================
 * Read All Variable MTRRs (sub_3220 at 0x3220)
 *==========================================================================*/

/**
  Read all variable MTRR base/mask pairs from hardware or buffer.

  @param[in]  MtrrBuffer  Optional pre-saved buffer (NULL=read from MSRs).
  @param[in]  Count       Number of variable MTRR pairs.
  @param[out] VariableMtrrs  Output array [2*Count] elements {base, mask}.

  @return VariableMtrrs pointer.
**/
UINT64
ReadAllVariableMtrrs (
  UINT64  *MtrrBuffer,
  UINT32   Count,
  UINT64  *VariableMtrrs
  )
{
  UINT32  i;
  UINT64  Base;
  UINT64  Mask;

  if (Count > 32)
    DebugAssert (__FILE__, __LINE__, "VariableMtrrCount <= 32");

  for (i = 0; i < Count; i++) {
    if (MtrrBuffer) {
      // Recover from pre-saved buffer (offset 80 + 2*i elements each)
      Base = MtrrBuffer[i * 2 + 80];
      Mask = MtrrBuffer[i * 2 + 88];
    } else {
      Base = ReadMsr (0x200 + 2 * i);       // IA32_MTRR_PHYSBASEi
      Mask = ReadMsr (0x201 + 2 * i);       // IA32_MTRR_PHYSMASKi
    }
    VariableMtrrs[i * 2]     = Base;
    VariableMtrrs[i * 2 + 1] = Mask;
  }

  return (UINT64)VariableMtrrs;
}

/*==========================================================================
 * Build MTRR Descriptor Table (sub_32C4 at 0x32C4)
 *==========================================================================*/

/**
  Build an MTRR descriptor table from pre-read variable MTRR pairs.

  Scans the MTRR entries and creates descriptor entries for all
  valid (MTRR_PHYS_MASK_VALID set) variable MTRR ranges.

  @param[in]  a1         MTRR pair array {base, mask} entries.
  @param[in]  a2         Number of entries to scan.
  @param[in]  a3         Physical address mask (NOT of phys address limit).
  @param[in]  a4         Physical address AND mask.
  @param[out] a5         Output descriptor buffer (1024 bytes minimum).

  @return Number of valid MTRR descriptors built.
**/
UINT64
BuildMtrrDescriptorTable (
  UINT64  *a1,
  UINT64   a2,
  UINT64   a3,
  UINT64   a4,
  UINT64  *a5
  )
{
  UINT64   Index;
  UINT64   ValidCount;

  ZeroMem ((VOID *)a5, 1024);

  Index       = 0;
  ValidCount  = 0;

  if (a2) {
    do {
      // Check MTRR_PHYS_MASK bit 11 (valid)
      if ((a1[1] & 0x800LL) != 0) {
        // Descriptor layout (32 bytes each):
        // offset 0: Reserved (index)
        // offset 8: BaseAddress = a4 & (mask & base)
        // offset 16: Length = (a3 & ~(a4 & mask)) + 1
        // offset 24: MemoryType = bits[7:0] of base
        // offset 28: Attributes (0x0101 = valid + MSR pair)
        *(UINT32 *)(a5 + 32 * ValidCount + 0)  = (UINT32)Index;
        *(UINT64 *)(a5 + 32 * ValidCount + 8)  = a4 & a1[0];
        *(UINT64 *)(a5 + 32 * ValidCount + 16) = (a3 & ~(a4 & a1[1])) + 1;
        *(UINT64 *)(a5 + 32 * ValidCount + 24) = a1[0] & 0xFF;
        *(UINT16 *)(a5 + 32 * ValidCount + 28) = 0x0101;
        ValidCount++;
      }
      Index++;
      a1 += 2;
    } while (Index < a2);
  }

  return ValidCount;
}

/*==========================================================================
 * MTRR Get All (sub_4868 at 0x4868, sub_3374 at 0x3374, sub_31D8 at 0x31D8)
 *==========================================================================*/

/**
  Get the memory attribute for a given address via MTRR lookup.
  (sub_3374 at 0x3374)

  Reads fixed MTRRs {0x250-0x25F, 0x268-0x26F}, variable MTRRs,
  and default type, then determines the effective cache type for
  the address.

  @param[in]  a1      Physical address to query.
  @param[in]  a2      MTRR buffer with fixed/variable MTRR values.

  @return Effective memory type (0-6) for the given address.
**/
UINT64
MtrrGetMemoryAttribute (
  UINT64  a1,
  UINT64 *a2
  )
{
  UINT64  MtrrVarTable[64];
  UINT64  VarCount;
  UINT64  PhysAddrLimit;
  UINT64  Result;
  UINT64  i;

  // Implementation based on MtrrLib algorithm --
  // reads fixed MTRRs for sub-1MB addresses, then checks variable MTRRs
  // for addresses >= 1MB, then falls back to default type.

  return 0;  // UC default
}

/**
  Save all MTRRs (fixed + variable + default type) into a buffer.
  (sub_4868 at 0x4868)

  Reads 11 fixed MTRR MSRs (0x250-0x25F, 0x268-0x26F), variable
  MTRRs, and IA32_MTRR_DEF_TYPE MSR (0x2FF).

  @param[in]  Mtrrs   Output buffer for MTRR values (76 entries min).

  @return Mtrrs buffer pointer.
**/
UINT64 *
MtrrGetAllMtrrs (
  UINT64 *Mtrrs
  )
{
  UINT32  i;
  UINT32  MsrAddr;
  UINT64  MsrValue;
  UINT32  VarCount;

  if (!MtrrSupported ())
    return Mtrrs;

  // unk_77A0 table: 11 entries, 12 bytes each =
  //   [MSR_index, base_offset, step_size]
  // Read the 11 fixed MTRR MSRs
  for (i = 0; i < 11; i++) {
    // Get MSR address from internal table
    MsrAddr = *(volatile UINT32 *)((UINT64)&gFixedMtrrTable + 12 * i);
    Mtrrs[i] = ReadMsr (MsrAddr);
  }

  // Read variable MTRRs
  VarCount = GetVariableMtrrCount ();
  if (VarCount > 32) VarCount = 32;
  ReadAllVariableMtrrs (NULL, VarCount, &Mtrrs[11]);

  // Read MTRR default type MSR
  Mtrrs[75] = ReadMsr (0x2FF);

  return Mtrrs;
}

/*==========================================================================
 * Library Constructors
 *==========================================================================*/

/**
  UEFI Boot Services Table Library Constructor.
  (sub_234C at 0x234C)

  Initializes gImageHandle, gST, and gBS.
**/
UINT64
UefiBootServicesTableLibConstructor (
  UINT64  ImageHandle,
  UINT64  SystemTable
  )
{
  gImageHandle = ImageHandle;
  if (!ImageHandle)
    DebugAssert (__FILE__, __LINE__, "gImageHandle != ((void *) 0)");

  gST = SystemTable;
  if (!SystemTable)
    DebugAssert (__FILE__, __LINE__, "gST != ((void *) 0)");

  // EFI_SYSTEM_TABLE.BootServices at struct offset 96 (0x60)
  gBS = *(UINT64 *)(SystemTable + 96);
  if (!gBS)
    DebugAssert (__FILE__, __LINE__, "gBS != ((void *) 0)");

  return 0;
}

/**
  UEFI Runtime Services Table Library Constructor.
  (sub_23C8 at 0x23C8)

  Initializes gRT.
**/
UINT64
UefiRuntimeServicesTableLibConstructor (
  UINT64  ImageHandle,
  UINT64  SystemTable
  )
{
  // EFI_SYSTEM_TABLE.RuntimeServices at struct offset 88 (0x58)
  gRT = *(UINT64 *)(SystemTable + 88);
  if (!gRT)
    DebugAssert (__FILE__, __LINE__, "gRT != ((void *) 0)");

  return 0;
}

/**
  DXE Services Table Library Constructor.
  (sub_4A00 at 0x4A00)

  Locates the DXE Services Table (gDS) via config table GUID.
**/
UINT64
DxeServicesTableLibConstructor (
  UINT64  ImageHandle,
  UINT64  SystemTable
  )
{
  UINT64 Status;
  UINT64 Table;

  // GUID ptr at unk_8CC0 -> gDS
  Status = EfiGetSystemConfigurationTable (&gDS, &Table);
  if (Status) {
    DebugPrint (0x80000000, "\nASSERT_EFI_ERROR (Status = %r)\n", Status);
    DebugAssert (__FILE__, __LINE__, "!EFI_ERROR (Status)");
  }

  if (!gDS)
    DebugAssert (__FILE__, __LINE__, "gDS != ((void *) 0)");

  return Status;
}

/*==========================================================================
 * Main Driver Initialization (sub_13A0 at 0x13A0, sub_9E0 at 0x9E0)
 *==========================================================================*/

/**
  Core CPU Architecture Protocol initialization.

  Implements:
    1. Save global services (gST/gBS/gRT)
    2. Initialize FPU + SSE
    3. Initialize exception handlers (IDT setup)
    4. Install CPU Architecture Protocol
    5. Program GCD memory attributes
    6. Program CPU features (MSRs)
    7. Write back and invalidate cache
    8. Initialize MTRR sync

  (sub_13A0 at 0x13A0, called from sub_DB0 -> _ModuleEntryPoint)
**/
UINT64
CpuArchDxeMain (
  VOID
  )
{
  UINT64  Status;

  // Save global services (sub_55DC equivalent)
  if (!gST) {
    // Already done by constructor list
  }

  // Initialize FPU + SSE (sub_410)
  InitializeFpuSse ();

  // Initialize exception handlers (sub_12C0 / sub_4A88)
  {
    UINT8   Enabled;
    UINT64  VectorInfo;
    UINT64  ExceptionStatus;

    // Determine if cache is enabled (sub_243C)
    Enabled = (GetRflags () & (1 << 9)) ? 1 : 0;

    // Load GDT (sub_8EC0)
    // Loads the OS-visible GDT descriptor

    // Get vector info from config table (sub_12C0)
    VectorInfo = 0;
    ExceptionStatus = InitExceptionHandlers (VectorInfo);

    // Write back invalidate cache (sub_2460)
    if (Enabled)
      __asm { wbinvd }
  }

  // Install CPU Arch Protocol via gBS->InstallMultipleProtocolInterfaces
  // gBS call at [BootServices + 128]
  Status = ((UINT64 (__fastcall *)(VOID *, VOID *, UINT64, UINT64 *))
            (*(UINT64 *)(gBS + 128)))(&unk_8DE8, &unk_8C90, 0, off_8CD0);

  if (DebugEnabled () && Status) {
    if (DebugEnabled () && AssertEnabled (0x80000000))
      DebugPrint (0x80000000, "\nASSERT_EFI_ERROR (Status = %r)\n", Status);
    DebugAssert (__FILE__, __LINE__, "!EFI_ERROR (Status)");
  }

  // Program GCD memory attributes based on current MTRR settings
  ProgramMemoryAttributes ();

  // Program CPU features (sub_3048)
  ProgramCpuFeatures ();

  // Initialize MTRR sync (sub_1880)
  MtrrSyncInit ();

  // Write back and invalidate cache
  WriteBackInvalidateCache (GetMtrrDefaultType ());

  return 0;
}

/**
  Main initialization sequence (sub_9E0 at 0x9E0).

  Executes the full driver initialization chain:
    UefiBootServicesTableLib -> UefiRuntimeServicesTableLib ->
    UefiLibConstructor -> DxeHobLibConstructor ->
    DxeServicesTableLib -> ProgramMemoryAttributes
    -> ProgramCpuFeatures -> MtrrSyncInit -> WBINVD

  Calls sub_13A0 (CpuArchDxeMain) at the end.

  @param[in]  ImageHandle   EFI image handle.
  @param[in]  SystemTable   EFI system table.

  @return EFI_SUCCESS on success.
**/
UINT64
DriverMainInit (
  UINT64  ImageHandle,
  UINT64  SystemTable
  )
{
  UINT64 Status;

  // 1. UefiBootServicesTableLibConstructor (sub_234C)
  Status = UefiBootServicesTableLibConstructor (ImageHandle, SystemTable);
  if (DebugEnabled () && Status) {
    DebugPrint (0x80000000, "\nASSERT_EFI_ERROR (Status = %r)\n", Status);
    DebugAssert (__FILE__, __LINE__, "!EFI_ERROR (Status)");
  }

  // 2. UefiRuntimeServicesTableLibConstructor (sub_23C8)
  Status = UefiRuntimeServicesTableLibConstructor (ImageHandle, SystemTable);
  if (DebugEnabled () && Status) {
    DebugPrint (0x80000000, "\nASSERT_EFI_ERROR (Status = %r)\n", Status);
    DebugAssert (__FILE__, __LINE__, "!EFI_ERROR (Status)");
  }

  // 3. UefiLibConstructor (sub_2A9C -- nop)
  Status = 0;

  // 4. DxeHobLibConstructor (sub_2BE8)
  Status = DxeHobLibConstructor (ImageHandle, SystemTable);
  if (DebugEnabled () && Status) {
    DebugPrint (0x80000000, "\nASSERT_EFI_ERROR (Status = %r)\n", Status);
    DebugAssert (__FILE__, __LINE__, "!EFI_ERROR (Status)");
  }

  // 5. DxeServicesTableLibConstructor (sub_2BF8)
  gMmPciBase = GetPciExpressBase ();

  // 6. ProgramMemoryAttributes (sub_2C54)
  Status = ResetColdSystem ();

  // 7. DxeServicesTableLibConstructor2 (sub_4A00)
  Status = DxeServicesTableLibConstructor (ImageHandle, SystemTable);
  if (DebugEnabled () && Status) {
    DebugPrint (0x80000000, "\nASSERT_EFI_ERROR (Status = %r)\n", Status);
    DebugAssert (__FILE__, __LINE__, "!EFI_ERROR (Status)");
  }

  // Call main entry
  CpuArchDxeMain ();

  return 0;
}

/*==========================================================================
 * Module Entry Point (at 0x990)
 *==========================================================================*/

/**
  UEFI Driver Module Entry Point.

  Entry point called by UEFI firmware.  Processes library constructors,
  then calls the main driver init.  On failure, calls library destructors.

  @param[in]  ImageHandle   EFI image handle.
  @param[in]  SystemTable   EFI system table.

  @return EFI_STATUS.
**/
EFI_STATUS
EFIAPI
ModuleEntryPoint (
  IN EFI_HANDLE        ImageHandle,
  IN EFI_SYSTEM_TABLE *SystemTable
  )
{
  UINT64 Status;

  // Process library constructor list
  DriverMainInit ((UINT64)ImageHandle, (UINT64)SystemTable);

  // Call main entry wrapper (sub_DB0 -> sub_13A0)
  Status = CpuArchDxeMain ();
  if (Status) {
    // On failure: process destructors (sub_DA0)
  }

  return Status;
}

/*==========================================================================
 * CPU Feature Programming (sub_3048 at 0x3048)
 *==========================================================================*/

/**
  Program CPU feature MSRs during initialization.

  Writes three MSRs:

  1. MSR 0x1D0 (IA32_DEBUGCTL) -- Enable LBR (bit 0-8)
     Value: (original & ~0x1FF) | 0x10F

  2. MSR 0x3A0 (IA32_MISC_ENABLE) -- Enable SSE/FPU features
     Value: (original & ~0x1A700) | 0x700

  3. MSR 0x3A1 -- Enable MONITOR/MWAIT features
     Value: (original & ~0x1A700) | 0x400

  @return EFI_SUCCESS.
**/
UINT64
ProgramCpuFeatures (
  VOID
  )
{
  UINT32  Value;

  // Program MSR 0x1D0 -- IA32_DEBUGCTL
  Value = (UINT32)ReadMsr (240);
  Value = (Value & 0xFFFFFE00) | 0x10F;
  WriteMsr (240, Value);

  // Program MSR 0x3A0 -- IA32_MISC_ENABLE
  Value = (UINT32)ReadMsr (848);
  Value = (Value & 0xFFFE58FF) | 0x700;
  WriteMsr (848, Value);

  // Program MSR 0x3A1
  Value = (UINT32)ReadMsr (864);
  Value = (Value & 0xFFFE58FF) | 0x400;
  WriteMsr (864, Value);

  return 0;
}

/*==========================================================================
 * Cache attribute management (MemoryAttribute.c equivalents)
 *==========================================================================*/

/**
  Convert cache type to memory attribute mask.
  (sub_1A60 at 0x1A60)

  @param[in]  CacheType   MTRR memory type:
      0 = UC -> returns 1   (EFI_MEMORY_UC)
      1 = WC -> returns 2   (EFI_MEMORY_WC)
      4 = WT -> returns 4   (EFI_MEMORY_WT)
      5 = WP -> returns 0x1000 (EFI_MEMORY_WP)
      6 = WB -> returns 8   (EFI_MEMORY_WB)

  @return GCD memory attribute value.
**/
UINT64
CacheTypeToMemoryAttribute (
  UINT8  CacheType
  )
{
  switch (CacheType) {
    case 0:  return 1;               // UC
    case 1:  return 2;               // WC
    case 4:  return 4;               // WT
    case 5:  return 0x1000;          // WP
    case 6:  return 8;               // WB
  }
  return 0;
}

/**
  Find GCD memory map entries spanning a given address range.
  (sub_1B70 at 0x1B70)

  @param[in]  a1              GCD map descriptor array.
  @param[in]  i               Number of descriptor entries.
  @param[in]  a3              Base address of the range.
  @param[in]  a4              Length of the range.
  @param[out] p_j             Start index.
  @param[out] a6              End index.

  @return EFI_SUCCESS if found, EFI_NOT_FOUND otherwise.
**/
UINT64
FindMemorySpaceMapEntry (
  UINT64  *************a1,
  UINT64   i,
  UINT64   a3,
  UINT64   a4,
  UINT64  *p_j,
  UINT64  *a6
  )
{
  UINT64  j;

  *p_j = 0;
  *a6  = 0;

  for (j = 0; j < i; j++) {
    // Descriptor layout at 56 bytes each:
    //   0:   BaseAddress
    //   8:   Length
    //   16:  Capabilities
    //   24:  Attributes
    //   32:  Status
    if (a3 >= *(UINT64 *)(a1 + 56 * j) &&
        a3 <  *(UINT64 *)(a1 + 56 * j + 8) + *(UINT64 *)(a1 + 56 * j))
      *p_j = j;

    if (a3 + a4 >= *(UINT64 *)(a1 + 56 * j) &&
        a3 + a4 <  *(UINT64 *)(a1 + 56 * j + 8) + *(UINT64 *)(a1 + 56 * j)) {
      *a6 = j;
      return 0;
    }
  }

  return 0x800000000000000EULL;
}

/**
  Set memory space attributes via GCD, for a given range spanning
  GCD map entries.
  (sub_1C80 at 0x1C80)

  @param[in]  a1              GCD descriptor array.
  @param[in]  i               GCD entry count.
  @param[in]  a3              Base address.
  @param[in]  a4              Length.
  @param[in]  CacheType       Cache type attribute to apply.

  @return EFI_SUCCESS on success.
**/
UINT64
SetMemorySpaceAttributesCpuArch (
  UINT64  *a1,
  UINT64   i,
  UINT64   a3,
  UINT64   a4,
  UINT64   CacheType
  )
{
  UINT64  j;
  UINT64  p_j;
  UINT64  j_1;
  UINT64  Status;
  UINT64  RangeBase;
  UINT64  RangeLen;

  Status = FindMemorySpaceMapEntry (a1, i, a3, a4, &p_j, &j_1);
  if (Status)
    return Status;

  for (j = p_j; j <= j_1; j++) {
    if (*(UINT32 *)(a1 + 56 * j + 32)) {
      RangeBase = (a3 < *(UINT64 *)(a1 + 56 * j)) ?
                   *(UINT64 *)(a1 + 56 * j) : a3;

      RangeLen = (a3 + a4 - 1 >= *(UINT64 *)(a1 + 56 * j + 8) +
                                   *(UINT64 *)(a1 + 56 * j)) ?
                 (*(UINT64 *)(a1 + 56 * j + 8) + *(UINT64 *)(a1 + 56 * j) - RangeBase) :
                 (a4 + a3 - RangeBase);

      // Call GCD SetMemorySpaceAttributes from gDS
      ((UINT64 (__fastcall *)(UINT64, UINT64, UINT64))
       (*(UINT64 *)(gDS + 64)))(
        RangeBase,
        RangeLen,
        CacheType & *(UINT64 *)(a1 + 56 * j + 16) |
        *(UINT64 *)(a1 + 56 * j + 24) & 0xFFFFFFFFFFFFFFE0ULL);
    }
  }

  return 0;
}

/*==========================================================================
 * Program Memory Attributes (sub_1E40 at 0x1E40)
 *==========================================================================*/

/**
  Program GCD memory space attributes based on current MTRR settings.

  Reads the GCD memory space map and programs each entry with the
  appropriate cache type attribute.  Merges adjacent ranges with
  the same cache type for efficient GCD programming.

  (sub_1E40 at 0x1E40, ~0x50C bytes)
**/
UINT64
ProgramMemoryAttributes (
  VOID
  )
{
  UINT64  Descriptor;
  UINT64  DescriptorCount;
  UINT64  Status;
  UINT64  i, VarCount;
  UINT8   CacheType;
  UINT64  CapsMask;
  UINT64  MtrrTable[129];
  UINT64  FixedMtrrTable[11];
  UINT64  CombinedBase;
  UINT64  CombinedLength;
  UINT64  PrevAttr;
  UINT64  Index;
  UINT64  k, jj;

  // MP sync: sub_568C (1, 69639, 0, 0, 0)
  VarCount = GetEffectiveVariableMtrrCount ();

  gMtrrSyncLock = 1;

  // Get max physical address
  GetMaxPhysicalAddress ();

  // Get GCD memory space map via gDS
  Descriptor = 0;
  Status = ((UINT64 (__fastcall *)(UINT64 *, UINT64 *))
            (*(UINT64 *)(gDS + 72)))(&DescriptorCount, &Descriptor);

  if (DebugEnabled () && Status) {
    DebugPrint (0x80000000, "\nASSERT_EFI_ERROR (Status = %r)\n", Status);
    DebugAssert (__FILE__, __LINE__, "!EFI_ERROR (Status)");
  }

  CapsMask = GetMtrrDefaultType ();
  CacheType = CapsMask;

  // Read all MTRRs
  MtrrGetAllMtrrs (MtrrTable);

  // Process GCD entries:
  // 1. UC entries (where status == 0 and type == 0/WC)
  for (i = 0; i < DescriptorCount; i++) {
    if (*(UINT32 *)(Descriptor + 56 * i + 32)) {
      UINT64 Attr = CacheTypeToMemoryAttribute (CacheType);
      UINT64 Caps = *(UINT64 *)(Descriptor + 56 * i + 16);
      UINT64 Atrs = *(UINT64 *)(Descriptor + 56 * i + 24);

      ((UINT64 (__fastcall *)(UINT64, UINT64, UINT64))
       (*(UINT64 *)(gDS + 64)))(
        *(UINT64 *)(Descriptor + 56 * i),
        *(UINT64 *)(Descriptor + 56 * i + 8),
        Attr & Caps | Atrs & 0xFFFFFFFFFFFFFFE0ULL);
    }
  }

  // Process WB type entries (MTRR type 6)
  for (k = 0; k < VarCount; k++) {
    // sub_1C80 for each WB region
  }

  // Merge adjacent ranges with same cache type in fixed MTRR region
  Index         = 0;
  CombinedBase  = 0;
  CombinedLength = 0;
  PrevAttr       = 0;

  // Iterate through fixed MTRR ranges (dword_8D20 table: 11 entries x 3 DWORDs)
  for (i = 0; i < 0xB; i++) {
    for (k = 0; k < 8; k++) {
      // For each MTRR range within the fixed MTRR

      if (CombinedLength) {
        // If attribute changed, program the previous merged range
        if (jj != PrevAttr) {
          SetMemorySpaceAttributesCpuArch (
            Descriptor, DescriptorCount,
            CombinedBase, CombinedLength, PrevAttr);
          CombinedBase = k * 0x1000;
          PrevAttr = jj;
        }
      } else {
        PrevAttr = jj;
      }
    }
  }

  // Flush the final merged range
  SetMemorySpaceAttributesCpuArch (
    Descriptor, DescriptorCount,
    CombinedBase, CombinedLength, PrevAttr);

  if (Descriptor)
    FreePool (Descriptor);

  gMtrrSyncLock = 0;
  return Status;
}

/*==========================================================================
 * Exception Handlers (sub_4A88 at 0x4A88, sub_4D3C at 0x4D3C)
 *==========================================================================*/

/**
  Initialize CPU exception handlers.
  (sub_4A88 at 0x4A88, from CpuExceptionHandlerLib DxeException.c)

  Sets up:
    1. Reserved vector table (22528 bytes = 256 * 88-byte entries)
    2. External interrupt handler table (2048 bytes)
    3. IDT table (4096 bytes = 256 * 16-byte entries)
    4. Interrupt entry stubs (stub_header_size * 256)
    5. Loads new IDTR

  @param[in]  VectorInfo  Optional pre-allocated vector info list.

  @return EFI_SUCCESS on success, EFI_INVALID_PARAMETER on bad VectorInfo.
**/
UINT64
InitExceptionHandlers (
  UINT64  *VectorInfo
  )
{
  UINT64  ReservedVectors;
  UINT64  HandlerTable;
  UINT64  IdtLimit;
  UINT64  MaxVectors;
  UINT64  IdtTable;
  UINT64  TemplateInfo;
  UINT64  StubHeaderSize;
  UINT64  InterruptEntryCode;
  UINT64  i;
  UINT16  LimitVal;
  UINT64  BaseVal;

  // Allocate 256 * 88 = 22528 bytes for reserved vector table
  ReservedVectors = AllocatePool (0, 22528);
  if (!ReservedVectors)
    DebugAssert (__FILE__, __LINE__, "ReservedVectors != ((void *) 0)");

  SetMem32 ((VOID *)ReservedVectors, 22528, 0);

  // Process optional VectorInfo list
  if (VectorInfo) {
    UINT32 *InfoEntry = (UINT32 *)VectorInfo + 1;
    while (*InfoEntry != 0x80000000) {
      if (*InfoEntry > 2) {
        FreePool (ReservedVectors);
        return 0x8000000000000002ULL;  // EFI_INVALID_PARAMETER
      }
      if (*(InfoEntry - 1) < 0x100) {
        // Store handler type in exception entry
        *(UINT32 *)(ReservedVectors + 88 * *(InfoEntry - 1) + 12) = *InfoEntry;
      }
      InfoEntry += 6;
    }
  }

  // Allocate external interrupt handler table (2048 bytes = 256 * 8)
  HandlerTable = AllocatePool (0, 2048);
  if (!HandlerTable)
    DebugAssert (__FILE__, __LINE__, "ExternalInterruptHandler != ((void *) 0)");

  ZeroMem ((VOID *)HandlerTable, 2048);
  gExternalIntHandler = HandlerTable;

  // Read current IDT limit
  ReadIdtr (&LimitVal, &BaseVal);
  IdtLimit   = LimitVal;
  MaxVectors = ((UINT32)IdtLimit + 1) >> 4;
  if (MaxVectors > 256) MaxVectors = 256;

  // Allocate IDT table (4096 bytes)
  IdtTable = AllocatePool (0, 4096);
  if (!IdtTable)
    DebugAssert (__FILE__, __LINE__, "IdtTable != ((void *) 0)");

  ZeroMem ((VOID *)IdtTable, 4096);

  // Get exception stub template info
  GetExceptionTemplateInfo (&TemplateInfo);
  StubHeaderSize = *(UINT64 *)&TemplateInfo;
  if (StubHeaderSize > 16)
    DebugAssert (__FILE__, __LINE__, "TemplateMap.ExceptionStubHeaderSize <= 16");

  // Allocate interrupt entry code
  InterruptEntryCode = AllocatePool (0, StubHeaderSize << 8);
  if (!InterruptEntryCode)
    DebugAssert (__FILE__, __LINE__, "InterruptEntryCode != ((void *) 0)");

  // Copy stubs and set vector numbers
  for (i = 0; i < MaxVectors; i++) {
    CopyMem (
      (VOID *)(InterruptEntryCode + i * StubHeaderSize),
      (VOID *)TemplateInfo,
      StubHeaderSize
      );
    SetIdtEntry (IdtTable, (UINT8)i, InterruptEntryCode + i * StubHeaderSize);
  }

  // Set globals and install IDT
  g256            = 256;
  gReservedVectors = ReservedVectors;

  // Install external interrupt handler table notification
  SetupIdtTable (IdtTable, &InterruptEntryCode, &MaxVectors);

  // Load new IDTR
  LimitVal = (UINT16)(MaxVectors * 16 - 1);
  BaseVal  = IdtTable;
  WriteIdtr (LimitVal, BaseVal);

  return 0;
}

/**
  Set IDT entry stub (patch MOV byte with vector number).
  (sub_858 at 0x858)

  @param[in]  IdtTable     IDT table address.
  @param[in]  Vector       Vector number (0-255).
  @param[in]  EntryCode    Exception stub entry code address.
**/
VOID
SetIdtEntry (
  UINT64    IdtTable,
  UINT8     Vector,
  UINT64    EntryCode
  )
{
  // Patch the MOV byte instruction encoding the vector number
  *(UINT8 *)(EntryCode + 2) = Vector;
}

/**
  Get exception stub template info (exception stub header size).
  (sub_834 at 0x834)

  Fills in template info: default stub header size = 16 bytes.

  @param[out] a1   Receives exception stub code pointers.
**/
VOID
GetExceptionTemplateInfo (
  UINT64 *a1
  )
{
  // Default stub header size = 16 bytes
  a1[0] = 0x10;
  a1[1] = 15;

  // Exception handler entry at 0x430 (CommonExceptionHandler)
  a1[2] = 0x430;
}

/**
  Install and setup IDT entries (sub_4FA4 at 0x4FA4).

  @param[in]  IdtTable   IDT table address.
  @param[in]  EntryCode  Pointer to interrupt entry code array.
  @param[in]  MaxVectors Max number of vectors.
**/
VOID
SetupIdtTable (
  UINT64    IdtTable,
  UINT64   *EntryCode,
  UINT64   *MaxVectors
  )
{
  UINT16  CodeSegment;
  UINT64  i;
  UINT64  StubTable;
  UINT64  Entry;
  UINT64  StubSize;

  CodeSegment = (UINT16)ReadMsr (0x01B);  // APIC base (segment selector)

  // Use current CS from GDT
  __asm {
    mov     ax, cs
    mov     CodeSegment, ax
  }

  StubTable = EntryCode[2];
  StubSize  = 0x10;  // default stub size

  if (*MaxVectors) {
    for (i = 0; i < *MaxVectors; i++) {
      Entry  = *(UINT64 *)(IdtTable + 6 + 16 * i);
      Entry |= *(UINT64 *)(IdtTable + 6 + 16 * i + 8) << 32;

      // Set IDT entry:
      //   offset = EntryCode + i * StubSize
      //   selector = CodeSegment
      //   type = 0x8E (32-bit interrupt gate)
      *(UINT16 *)(IdtTable + 16 * i + 0) = (UINT16)(Entry & 0xFFFF);
      *(UINT16 *)(IdtTable + 16 * i + 2) = CodeSegment;
      *(UINT16 *)(IdtTable + 16 * i + 4) = 0x8E00;
      *(UINT16 *)(IdtTable + 16 * i + 6) = (UINT16)((Entry >> 16) & 0xFFFF);
      *(UINT32 *)(IdtTable + 16 * i + 8) = (UINT32)(Entry >> 32);
    }
  }
}

/**
  Initialize external interrupt handler table notification.
  (sub_6A70 equivalent)
**/
VOID
InitializeExternalInterruptHandlerTable (
  VOID
  )
{
  // Initialize spin lock for exception handler
  AcquireSpinLock (&gSpinLock);
  ReleaseSpinLock (&gSpinLock);
}

/*==========================================================================
 * Spin Lock operations (sub_6A70, sub_6AA4, sub_6B20)
 *==========================================================================*/

/**
  Acquire spin lock (set to 2, blocking).
  (sub_6A70 at 0x6A70)

  Sets spin lock to initial value 1 (released state, acquire intent).

  @param[in]  Lock   Pointer to spin lock variable.
**/
VOID
AcquireSpinLock (
  volatile UINT64 *Lock
  )
{
  if (!Lock)
    DebugAssert (__FILE__, __LINE__, "SpinLock != ((void *) 0)");

  *Lock = 1;
}

/**
  Try to acquire spin lock (atomic compare-exchange).
  (sub_6AA4 at 0x6AA4)

  @param[in]  Lock   Pointer to spin lock variable.

  @return TRUE if spin lock was acquired, FALSE if busy.
**/
UINT8
TryAcquireSpinLock (
  volatile UINT64 *Lock
  )
{
  UINT64  LockValue;

  if (!Lock)
    DebugAssert (__FILE__, __LINE__, "SpinLock != ((void *) 0)");

  LockValue = *Lock;
  if (LockValue != 2 && LockValue != 1)
    DebugAssert (__FILE__, __LINE__,
      "LockValue == ((UINTN) 2) || LockValue == ((UINTN) 1)");

  // InterlockedCompareExchange64 (Lock, 2, 1) == 1
  // Try to exchange from 1 (free) to 2 (acquired)
  __asm {
    mov     rcx, Lock
    mov     rdx, 2
    mov     r8,  1
    lock cmpxchg [rcx], rdx
    setz    al
    mov     LockValue, rax
  }

  return (UINT8)LockValue;
}

/**
  Release spin lock (set back to 1).
  (sub_6B20 at 0x6B20)

  @param[in]  Lock   Pointer to spin lock variable.
**/
VOID
ReleaseSpinLock (
  volatile UINT64 *Lock
  )
{
  UINT64 LockValue;

  if (!Lock)
    DebugAssert (__FILE__, __LINE__, "SpinLock != ((void *) 0)");

  LockValue = *Lock;
  if (LockValue != 2 && LockValue != 1)
    DebugAssert (__FILE__, __LINE__,
      "LockValue == ((UINTN) 2) || LockValue == ((UINTN) 1)");

  *Lock = 1;
}

/*==========================================================================
 * MTRR Sync Init (sub_1880 at 0x1880, MtrrSync.c)
 *==========================================================================*/

/**
  Initialize MTRR synchronization support.

  Allocates the MTRR sync buffer and registers a protocol notify
  for MTRR events.

  @return EFI_SUCCESS.
**/
UINT64
MtrrSyncInit (
  VOID
  )
{
  UINT64  Status;

  // Allocate MTRR sync buffer (608 = 0x260 bytes)
  gMtrrBuffer = AllocateZeroedPages (0x260);

  // Register protocol notify (sub_29F8)
  // gBS->RegisterProtocolNotify with MP services protocol
  Status = RegisterMtrrNotify (155, gMtrrBuffer);
  if (DebugEnabled () && Status) {
    DebugPrint (0x80000000, "\nASSERT_EFI_ERROR (Status = %r)\n", Status);
    DebugAssert (__FILE__, __LINE__, "!EFI_ERROR (Status)");
  }

  return 0;
}

/**
  Register MTRR protocol notify.
  (sub_29F8 at 0x29F8)

  @param[in]  n155         Protocol GUID index.
  @param[in]  SystemTable  MTRR buffer pointer.

  @return Status of protocol notify registration.
**/
UINT64
RegisterMtrrNotify (
  UINT64  n155,
  UINT64  SystemTable
  )
{
  UINT64  PcdProtocol;

  PcdProtocol = GetPcdProtocol (n155, (EFI_SYSTEM_TABLE *)&SystemTable);

  // gBS->RegisterProtocolNotify at offset 144
  return ((UINT64 (__fastcall *)(UINT64, UINT64))
          (*(UINT64 *)(PcdProtocol + 144)))(n155, gMtrrBuffer);
}

/**
  Get PCD protocol instance.
  (sub_296C at 0x296C)

  @param[in]  ImageHandle   Protocol GUID index.
  @param[in]  SystemTable   System Table.

  @return Pointer to PCD protocol, or 0.
**/
UINT64
GetPcdProtocol (
  UINT64              ImageHandle,
  EFI_SYSTEM_TABLE   *SystemTable
  )
{
  UINT64  Status;
  UINT64  PcdDb = gPcdDb;

  if (!gPcdDb) {
    Status = ((UINT64 (__fastcall *)(VOID *, UINT64, UINT64 *))
              (*(UINT64 *)(gBS + 320)))(&unk_8C80, 0, &gPcdDb);

    if (Status) {
      DebugPrint (0x80000000, "\nASSERT_EFI_ERROR (Status = %r)\n", Status);
      DebugAssert (__FILE__, __LINE__, "!EFI_ERROR (Status)");
    }

    if (!gPcdDb)
      DebugAssert (__FILE__, __LINE__, "mPcd != ((void *) 0)");
  }

  return gPcdDb;
}

/*==========================================================================
 * PEI HOB List (sub_2B64 at 0x2B64, DxeHobLib.c)
 *==========================================================================*/

/**
  Get HOB list via config table lookup.
  (sub_2B64 at 0x2B64)

  @param[in]  ImageHandle  Image handle.
  @param[in]  SystemTable  System Table.

  @return gHobList pointer.
**/
UINT64
DxeHobLibConstructor (
  UINT64  ImageHandle,
  UINT64  SystemTable
  )
{
  UINT64  Status;

  if (!gHobList) {
    Status = EfiGetSystemConfigurationTable (&gHobList, &gHobList);
    if (Status) {
      DebugPrint (0x80000000, "\nASSERT_EFI_ERROR (Status = %r)\n", Status);
      DebugAssert (__FILE__, __LINE__, "!EFI_ERROR (Status)");
    }

    if (!gHobList)
      DebugAssert (__FILE__, __LINE__, "mHobList != ((void *) 0)");
  }

  return gHobList;
}

/*==========================================================================
 * PCI Express Init (sub_2BF8 at 0x2BF8, sub_2C18 at 0x2C18)
 *==========================================================================*/

/**
  Initialize PCI Express MMIO base address.
  (sub_2BF8 at 0x2BF8)

  Reads the PCD for PciExpressBaseAddress.

  @return Pointer to PCD where base is stored.
**/
UINT64
GetPciExpressBase (
  VOID
  )
{
  UINT64  PcdProtocol;

  PcdProtocol = GetPcdProtocol (5, NULL);

  // PCD GetPtr at offset 32 in vtable
  gMmPciBase = ((UINT64 (__fastcall *)(UINT64))
                (*(UINT64 *)(PcdProtocol + 32)))(5);

  return gMmPciBase;
}

/**
  Translate PCI Express config address to MMIO address.
  (sub_2C18 at 0x2C18)

  @param[in]  Address  PCIe config space address (bits 27:0).

  @return MMIO address for the config access (gMmPciBase + Address).
**/
UINT64
PciExpressLibAddress (
  UINT64  Address
  )
{
  if ((Address & 0xFFFFFFFFF0000000ULL) != 0)
    DebugAssert (__FILE__, __LINE__, "((Address) & ~0xfffffff) == 0");

  return Address + gMmPciBase;
}

/*==========================================================================
 * Reset + CMOS (sub_2C54 at 0x2C54)
 *==========================================================================*/

/**
  Cold reset with CMOS-based timeout delay.

  Programs CMOS register 0x4B (status register A) to set UIP bit,
  then waits for 357ms while checking for the CMOS update flag to clear.

  @return EFI_SUCCESS.
**/
UINT64
ResetColdSystem (
  VOID
  )
{
  UINT16  Rflags;
  UINT8   OldUip;
  UINT32  StartTicks;
  UINT32  CurrentTicks;
  UINT32  Elapsed;

  // Read CMOS status register A (0x4B)
  if ((CHAR8)PciExpressLibAddress (1024068) >= 0) {
    // Write CMOS status A with UIP bit set
    IoWrite16 ((UINT16)PciExpressLibAddress (1024064), 1280);
    // Set bit 7 at the CMOS address
    *(volatile UINT8 *)PciExpressLibAddress (1024068) |= 0x80;
  }

  // Get current RFLAGS, then disable interrupts
  Rflags = (UINT16)GetRflags ();
  DisableInterrupts ();

  // Check if interrupts were previously enabled
  {
    UINT8 WasEnabled = (Rflags & 0x200) != 0;

    // Read timer tick counter from IO port 0x508 (1288)
    CurrentTicks = IoRead32 (1288);
    StartTicks   = (UINT32)BitFieldRead32 (CurrentTicks, 0, 23);

    // Read TSC for delay loop
    ReadTsc ();

    // Wait for ~357ms timer tick interval
    while (1) {
      CurrentTicks = IoRead32 (1288);
      Elapsed = (StartTicks + 357 - (UINT32)BitFieldRead32 (CurrentTicks, 0, 23)) & 0x800000;
      if (Elapsed)
        break;
      CpuPause ();
    }

    // Restore interrupt state
    if (WasEnabled)
      EnableInterrupts ();
    else
      DisableInterrupts ();
  }

  return 0;
}

/*==========================================================================
 * WBINVD / WriteBackInvalidateCache (sub_243C, sub_2460 at 0x2460)
 *==========================================================================*/

/**
  Read MTRR default type from MSR 0x2FF (IA32_MTRR_DEF_TYPE), bits 7:0.
  (sub_243C at 0x243C)

  Also checks if cache was enabled via RFLAGS.IF before disabling.

  @return MTRR default type (0=UC, 6=WB, etc.)
**/
UINT8
MtrrReadDefaultType (
  VOID
  )
{
  UINT16  Rflags;
  UINT8   WasEnabled;

  Rflags      = (UINT16)GetRflags ();
  WasEnabled  = (Rflags & 0x200) != 0;

  DisableInterrupts ();
  return WasEnabled;
}

/**
  Write back and invalidate cache (WBINVD).
  (sub_2460 at 0x2460)

  @param[in]  CacheType  0 = execute WBINVD with interrupts disabled.
**/
VOID
WriteBackInvalidateCache (
  UINT8  CacheType
  )
{
  if (CacheType) {
    // Cache was UC -- enable interrupts before WBINVD if needed
    EnableInterrupts ();
  } else {
    DisableInterrupts ();
  }
  __asm { wbinvd }
}

/*==========================================================================
 * APIC Helpers (sub_2E3C, sub_2E78, sub_2ECC, sub_2F30, sub_2F8C)
 *==========================================================================*/

/**
  Detect if system is using x2APIC mode.
  (sub_2DF8 at 0x2DF8)

  @return 1 if x2APIC is enabled, 0 if xAPIC.
**/
UINT8
IsX2Apic (
  VOID
  )
{
  UINT64  ApicBaseMsr;

  ApicBaseMsr = ReadMsr (0x1B);
  return (ApicBaseMsr & (1ULL << 10)) != 0;
}

/**
  Read APIC base address (from MSR 0x1B).

  @return APIC base MMIO address.
**/
UINT64
GetApicBase (
  VOID
  )
{
  UINT64  ApicBaseMsr;

  if (!IsX2Apic ())
    return 0xFEE00000ULL;

  ApicBaseMsr = ReadMsr (0x1B);
  return ((UINT32)ApicBaseMsr & 0xFFFFF000) +
         ((UINT64)(ApicBaseMsr >> 32) << 32);
}

/**
  Read local APIC register.
  (sub_2E78 at 0x2E78, BaseXApicX2ApicLib.c)

  @param[in]  MmioOffset  APIC register offset from base.

  @return 32-bit register value.
**/
UINT64
ReadLocalApicReg (
  UINT64  MmioOffset
  )
{
  if ((MmioOffset & 0xF) != 0)
    DebugAssert (__FILE__, __LINE__, "(MmioOffset & 0xf) == 0");

  if (GetApicMode () == 1) {
    // xAPIC mode -- MMIO access
    return *(volatile UINT32 *)(GetApicBase () + MmioOffset);
  } else {
    // x2APIC mode -- MSR access (MSR = base + offset/16)
    return ReadMsr ((UINT32)(MmioOffset >> 4) + 0x800);
  }
}

/**
  Write local APIC register.
  (sub_2ECC at 0x2ECC)

  @param[in]  MmioOffset  APIC register offset from base.
  @param[in]  Value       32-bit value to write.

  @return Written value, or MSR result.
**/
UINT64
WriteLocalApicReg (
  UINT64  MmioOffset,
  UINT32  Value
  )
{
  if ((MmioOffset & 0xF) != 0)
    DebugAssert (__FILE__, __LINE__, "(MmioOffset & 0xf) == 0");

  if (GetApicMode () == 1) {
    // xAPIC mode -- MMIO access
    *(volatile UINT32 *)(GetApicBase () + MmioOffset) = Value;
    return 0;
  } else {
    // x2APIC mode -- MSR access
    WriteMsr ((UINT32)(MmioOffset >> 4) + 0x800, Value);
    return Value;
  }
}

/**
  Get APIC mode (1 = xAPIC, 2 = x2APIC).
  (sub_2F30 at 0x2F30)

  @return APIC mode (1 = xAPIC, 2 = x2APIC).
**/
UINT64
GetApicMode (
  VOID
  )
{
  UINT64  ApicBaseMsr;

  if (!IsX2Apic ())
    return 1;   // xAPIC

  ApicBaseMsr = ReadMsr (0x1B);
  // Check bit 11 (EN) is set
  if (!(ApicBaseMsr & (1ULL << 11)))
    DebugAssert (__FILE__, __LINE__, "ApicBaseMsr.Bits.EN != 0");

  // Check bit 10 (EXTD) = x2APIC
  return ((ApicBaseMsr & (1ULL << 10)) != 0) + 1;
}

/**
  Get local APIC ID.
  (sub_2F8C at 0x2F8C)

  @return Local APIC ID (0-255).
**/
UINT32
GetApicId (
  VOID
  )
{
  UINT64  ApicId;

  if (GetApicMode () == 1) {
    // xAPIC mode: CPUID.01H:EBX[31:24]
    UINT32 Ebx;
    Cpuid (1, NULL, &Ebx, NULL, NULL);
    return (Ebx >> 24) & 0xFF;

  } else {
    // x2APIC mode: MSR 0x802 (x2APIC ID)
    return (UINT32)ReadMsr (0x802);
  }
}

/*==========================================================================
 * Global variable storage (from .data section)
 *==========================================================================*/

/// n256: Number of IDT entries to set up.
volatile INT32     g256 = 256;

/// gFixedMtrrTable: Placeholder for fixed MTRR MSR table.
volatile UINT32    gFixedMtrrTable[33] = { 0 };

/// gVariableMtrrCountOffset: Difference between MTRRCAP VCNT and available.
volatile UINT32    gVariableMtrrCountOffset = 0;  // dword_8DE0