/** @file Intel Processor Trace feature. Copyright (c) 2017 - 2023, Intel Corporation. All rights reserved.
SPDX-License-Identifier: BSD-2-Clause-Patent **/ #include "CpuCommonFeatures.h" /// /// This macro define the max entries in the Topa table. /// Each entry in the table contains some attribute bits, a pointer to an output region, and the size of the region. /// The last entry in the table may hold a pointer to the next table. This pointer can either point to the top of the /// current table (for circular array) or to the base of another table. /// At least 2 entries are needed because the list of entries must /// be terminated by an entry with the END bit set to 1, so 2 /// entries are required to use a single valid entry. /// #define MAX_TOPA_ENTRY_COUNT 2 /// /// Processor trace output scheme selection. /// typedef enum { RtitOutputSchemeSingleRange = 0, RtitOutputSchemeToPA } RTIT_OUTPUT_SCHEME; typedef struct { BOOLEAN TopaSupported; BOOLEAN SingleRangeSupported; MSR_IA32_RTIT_CTL_REGISTER RtitCtrl; MSR_IA32_RTIT_OUTPUT_BASE_REGISTER RtitOutputBase; MSR_IA32_RTIT_OUTPUT_MASK_PTRS_REGISTER RtitOutputMaskPtrs; BOOLEAN CycPacketSupported; } PROC_TRACE_PROCESSOR_DATA; typedef struct { UINT32 NumberOfProcessors; UINT8 ProcTraceOutputScheme; UINT32 ProcTraceMemSize; UINTN *ThreadMemRegionTable; UINTN AllocatedThreads; UINTN *TopaMemArray; BOOLEAN EnableOnBspOnly; BOOLEAN EnablePerformanceCollecting; PROC_TRACE_PROCESSOR_DATA *ProcessorData; } PROC_TRACE_DATA; typedef struct { RTIT_TOPA_TABLE_ENTRY TopaEntry[MAX_TOPA_ENTRY_COUNT]; } PROC_TRACE_TOPA_TABLE; /** Prepares for the data used by CPU feature detection and initialization. @param[in] NumberOfProcessors The number of CPUs in the platform. @return Pointer to a buffer of CPU related configuration data. @note This service could be called by BSP only. **/ VOID * EFIAPI ProcTraceGetConfigData ( IN UINTN NumberOfProcessors ) { PROC_TRACE_DATA *ConfigData; ConfigData = AllocateZeroPool (sizeof (PROC_TRACE_DATA) + sizeof (PROC_TRACE_PROCESSOR_DATA) * NumberOfProcessors); ASSERT (ConfigData != NULL); ConfigData->ProcessorData = (PROC_TRACE_PROCESSOR_DATA *)((UINT8 *)ConfigData + sizeof (PROC_TRACE_DATA)); ConfigData->NumberOfProcessors = (UINT32)NumberOfProcessors; ConfigData->ProcTraceMemSize = PcdGet32 (PcdCpuProcTraceMemSize); ConfigData->ProcTraceOutputScheme = PcdGet8 (PcdCpuProcTraceOutputScheme); ConfigData->EnableOnBspOnly = PcdGetBool (PcdCpuProcTraceBspOnly); ConfigData->EnablePerformanceCollecting = PcdGetBool (PcdCpuProcTracePerformanceCollecting); return ConfigData; } /** Detects if Intel Processor Trace feature supported on current processor. @param[in] ProcessorNumber The index of the CPU executing this function. @param[in] CpuInfo A pointer to the REGISTER_CPU_FEATURE_INFORMATION structure for the CPU executing this function. @param[in] ConfigData A pointer to the configuration buffer returned by CPU_FEATURE_GET_CONFIG_DATA. NULL if CPU_FEATURE_GET_CONFIG_DATA was not provided in RegisterCpuFeature(). @retval TRUE Processor Trace feature is supported. @retval FALSE Processor Trace feature is not supported. @note This service could be called by BSP/APs. **/ BOOLEAN EFIAPI ProcTraceSupport ( IN UINTN ProcessorNumber, IN REGISTER_CPU_FEATURE_INFORMATION *CpuInfo, IN VOID *ConfigData OPTIONAL ) { PROC_TRACE_DATA *ProcTraceData; CPUID_STRUCTURED_EXTENDED_FEATURE_FLAGS_EBX Ebx; CPUID_INTEL_PROCESSOR_TRACE_MAIN_LEAF_ECX ProcTraceEcx; CPUID_INTEL_PROCESSOR_TRACE_MAIN_LEAF_EBX ProcTraceEbx; // // Check if ProcTraceMemorySize option is enabled (0xFF means disable by user) // ProcTraceData = (PROC_TRACE_DATA *)ConfigData; ASSERT (ProcTraceData != NULL); if ((ProcTraceData->ProcTraceMemSize > RtitTopaMemorySize128M) || (ProcTraceData->ProcTraceOutputScheme > RtitOutputSchemeToPA)) { return FALSE; } // // Check if Processor Trace is supported // AsmCpuidEx (CPUID_STRUCTURED_EXTENDED_FEATURE_FLAGS, 0, NULL, &Ebx.Uint32, NULL, NULL); if (Ebx.Bits.IntelProcessorTrace == 0) { return FALSE; } AsmCpuidEx (CPUID_INTEL_PROCESSOR_TRACE, CPUID_INTEL_PROCESSOR_TRACE_MAIN_LEAF, NULL, &ProcTraceEbx.Uint32, &ProcTraceEcx.Uint32, NULL); ProcTraceData->ProcessorData[ProcessorNumber].TopaSupported = (BOOLEAN)(ProcTraceEcx.Bits.RTIT == 1); ProcTraceData->ProcessorData[ProcessorNumber].SingleRangeSupported = (BOOLEAN)(ProcTraceEcx.Bits.SingleRangeOutput == 1); if ((ProcTraceData->ProcessorData[ProcessorNumber].TopaSupported && (ProcTraceData->ProcTraceOutputScheme == RtitOutputSchemeToPA)) || (ProcTraceData->ProcessorData[ProcessorNumber].SingleRangeSupported && (ProcTraceData->ProcTraceOutputScheme == RtitOutputSchemeSingleRange))) { ProcTraceData->ProcessorData[ProcessorNumber].RtitCtrl.Uint64 = AsmReadMsr64 (MSR_IA32_RTIT_CTL); ProcTraceData->ProcessorData[ProcessorNumber].RtitOutputBase.Uint64 = AsmReadMsr64 (MSR_IA32_RTIT_OUTPUT_BASE); ProcTraceData->ProcessorData[ProcessorNumber].RtitOutputMaskPtrs.Uint64 = AsmReadMsr64 (MSR_IA32_RTIT_OUTPUT_MASK_PTRS); ProcTraceData->ProcessorData[ProcessorNumber].CycPacketSupported = (BOOLEAN)(ProcTraceEbx.Bits.ConfigurablePsb == 1); return TRUE; } return FALSE; } /** Initializes Intel Processor Trace feature to specific state. @param[in] ProcessorNumber The index of the CPU executing this function. @param[in] CpuInfo A pointer to the REGISTER_CPU_FEATURE_INFORMATION structure for the CPU executing this function. @param[in] ConfigData A pointer to the configuration buffer returned by CPU_FEATURE_GET_CONFIG_DATA. NULL if CPU_FEATURE_GET_CONFIG_DATA was not provided in RegisterCpuFeature(). @param[in] State If TRUE, then the Processor Trace feature must be enabled. If FALSE, then the Processor Trace feature must be disabled. @retval RETURN_SUCCESS Intel Processor Trace feature is initialized. **/ RETURN_STATUS EFIAPI ProcTraceInitialize ( IN UINTN ProcessorNumber, IN REGISTER_CPU_FEATURE_INFORMATION *CpuInfo, IN VOID *ConfigData OPTIONAL, IN BOOLEAN State ) { UINT32 MemRegionSize; UINTN Pages; UINTN Alignment; UINTN MemRegionBaseAddr; UINTN *ThreadMemRegionTable; UINTN Index; UINTN TopaTableBaseAddr; UINTN AlignedAddress; UINTN *TopaMemArray; PROC_TRACE_TOPA_TABLE *TopaTable; PROC_TRACE_DATA *ProcTraceData; BOOLEAN FirstIn; MSR_IA32_RTIT_CTL_REGISTER CtrlReg; MSR_IA32_RTIT_STATUS_REGISTER StatusReg; MSR_IA32_RTIT_OUTPUT_BASE_REGISTER OutputBaseReg; MSR_IA32_RTIT_OUTPUT_MASK_PTRS_REGISTER OutputMaskPtrsReg; RTIT_TOPA_TABLE_ENTRY *TopaEntryPtr; BOOLEAN IsBsp; // // The scope of the MSR_IA32_RTIT_* is core for below processor type, only program // MSR_IA32_RTIT_* for thread 0 in each core. // if (IS_GOLDMONT_PROCESSOR (CpuInfo->DisplayFamily, CpuInfo->DisplayModel) || IS_GOLDMONT_PLUS_PROCESSOR (CpuInfo->DisplayFamily, CpuInfo->DisplayModel)) { if (CpuInfo->ProcessorInfo.Location.Thread != 0) { return RETURN_SUCCESS; } } ProcTraceData = (PROC_TRACE_DATA *)ConfigData; ASSERT (ProcTraceData != NULL); // // Clear MSR_IA32_RTIT_CTL[0] and IA32_RTIT_STS only if MSR_IA32_RTIT_CTL[0]==1b // CtrlReg.Uint64 = ProcTraceData->ProcessorData[ProcessorNumber].RtitCtrl.Uint64; if (CtrlReg.Bits.TraceEn != 0) { /// /// Clear bit 0 in MSR IA32_RTIT_CTL (570) /// CtrlReg.Bits.TraceEn = 0; CPU_REGISTER_TABLE_WRITE64 ( ProcessorNumber, Msr, MSR_IA32_RTIT_CTL, CtrlReg.Uint64 ); /// /// Clear MSR IA32_RTIT_STS (571h) to all zeros /// StatusReg.Uint64 = 0x0; CPU_REGISTER_TABLE_WRITE64 ( ProcessorNumber, Msr, MSR_IA32_RTIT_STATUS, StatusReg.Uint64 ); } if (!State) { return RETURN_SUCCESS; } IsBsp = (CpuInfo->ProcessorInfo.StatusFlag & PROCESSOR_AS_BSP_BIT) ? TRUE : FALSE; if (ProcTraceData->EnableOnBspOnly && !IsBsp) { return RETURN_SUCCESS; } MemRegionBaseAddr = 0; FirstIn = FALSE; if (ProcTraceData->ThreadMemRegionTable == NULL) { FirstIn = TRUE; DEBUG ((DEBUG_INFO, "Initialize Processor Trace\n")); } /// /// Refer to PROC_TRACE_MEM_SIZE Table for Size Encoding /// MemRegionSize = (UINT32)(1 << (ProcTraceData->ProcTraceMemSize + 12)); if (FirstIn) { DEBUG ((DEBUG_INFO, "ProcTrace: MemSize requested: 0x%X \n", MemRegionSize)); } if (FirstIn) { // // Let BSP allocate and create the necessary memory region (Aligned to the size of // the memory region from setup option(ProcTraceMemSize) which is an integral multiple of 4kB) // for all the enabled threads to store Processor Trace debug data. Then Configure the trace // address base in MSR, IA32_RTIT_OUTPUT_BASE (560h) bits 47:12. Note that all regions must be // aligned based on their size, not just 4K. Thus a 2M region must have bits 20:12 cleared. // Pages = EFI_SIZE_TO_PAGES (MemRegionSize); Alignment = MemRegionSize; if (ProcTraceData->EnableOnBspOnly) { // // When only enable ProcTrace on BSP, this is the first and only time ProcTraceInitialize() runs. // MemRegionBaseAddr = (UINTN)AllocateAlignedReservedPages (Pages, Alignment); if (MemRegionBaseAddr == 0) { // // Could not allocate for BSP even // DEBUG ((DEBUG_ERROR, "ProcTrace: Out of mem, failed to allocate buffer for BSP\n")); return RETURN_OUT_OF_RESOURCES; } DEBUG ((DEBUG_INFO, "ProcTrace: Allocated PT MemRegionBaseAddr(aligned) for BSP only: 0x%llX.\n", (UINT64)MemRegionBaseAddr)); } else { ThreadMemRegionTable = (UINTN *)AllocatePool (ProcTraceData->NumberOfProcessors * sizeof (UINTN *)); if (ThreadMemRegionTable == NULL) { DEBUG ((DEBUG_ERROR, "Allocate ProcTrace ThreadMemRegionTable Failed\n")); return RETURN_OUT_OF_RESOURCES; } ProcTraceData->ThreadMemRegionTable = ThreadMemRegionTable; for (Index = 0; Index < ProcTraceData->NumberOfProcessors; Index++, ProcTraceData->AllocatedThreads++) { AlignedAddress = (UINTN)AllocateAlignedReservedPages (Pages, Alignment); if (AlignedAddress == 0) { DEBUG ((DEBUG_ERROR, "ProcTrace: Out of mem, allocated only for %d threads\n", ProcTraceData->AllocatedThreads)); if (Index == 0) { // // Could not allocate for BSP even // FreePool ((VOID *)ThreadMemRegionTable); ThreadMemRegionTable = NULL; return RETURN_OUT_OF_RESOURCES; } break; } ThreadMemRegionTable[Index] = AlignedAddress; DEBUG ((DEBUG_INFO, "ProcTrace: PT MemRegionBaseAddr(aligned) for thread %d: 0x%llX \n", Index, (UINT64)ThreadMemRegionTable[Index])); } DEBUG ((DEBUG_INFO, "ProcTrace: Allocated PT mem for %d thread \n", ProcTraceData->AllocatedThreads)); } } if (!ProcTraceData->EnableOnBspOnly) { if (ProcessorNumber < ProcTraceData->AllocatedThreads) { MemRegionBaseAddr = ProcTraceData->ThreadMemRegionTable[ProcessorNumber]; } else { return RETURN_SUCCESS; } } /// /// Check Processor Trace output scheme: Single Range output or ToPA table /// // // Single Range output scheme // if (ProcTraceData->ProcessorData[ProcessorNumber].SingleRangeSupported && (ProcTraceData->ProcTraceOutputScheme == RtitOutputSchemeSingleRange)) { if (FirstIn) { DEBUG ((DEBUG_INFO, "ProcTrace: Enabling Single Range Output scheme \n")); } // // Clear MSR IA32_RTIT_CTL (0x570) ToPA (Bit 8) // CtrlReg.Bits.ToPA = 0; CPU_REGISTER_TABLE_WRITE64 ( ProcessorNumber, Msr, MSR_IA32_RTIT_CTL, CtrlReg.Uint64 ); // // Program MSR IA32_RTIT_OUTPUT_BASE (0x560) bits[63:7] with the allocated Memory Region // OutputBaseReg.Uint64 = ProcTraceData->ProcessorData[ProcessorNumber].RtitOutputBase.Uint64; OutputBaseReg.Bits.Base = (MemRegionBaseAddr >> 7) & 0x01FFFFFF; OutputBaseReg.Bits.BaseHi = RShiftU64 ((UINT64)MemRegionBaseAddr, 32) & 0xFFFFFFFF; CPU_REGISTER_TABLE_WRITE64 ( ProcessorNumber, Msr, MSR_IA32_RTIT_OUTPUT_BASE, OutputBaseReg.Uint64 ); // // Program the Mask bits for the Memory Region to MSR IA32_RTIT_OUTPUT_MASK_PTRS (561h) // OutputMaskPtrsReg.Uint64 = ProcTraceData->ProcessorData[ProcessorNumber].RtitOutputMaskPtrs.Uint64; OutputMaskPtrsReg.Bits.MaskOrTableOffset = ((MemRegionSize - 1) >> 7) & 0x01FFFFFF; OutputMaskPtrsReg.Bits.OutputOffset = RShiftU64 (MemRegionSize - 1, 32) & 0xFFFFFFFF; CPU_REGISTER_TABLE_WRITE64 ( ProcessorNumber, Msr, MSR_IA32_RTIT_OUTPUT_MASK_PTRS, OutputMaskPtrsReg.Uint64 ); } // // ToPA(Table of physical address) scheme // if (ProcTraceData->ProcessorData[ProcessorNumber].TopaSupported && (ProcTraceData->ProcTraceOutputScheme == RtitOutputSchemeToPA)) { // // Create ToPA structure aligned at 4KB for each logical thread // with at least 2 entries by 8 bytes size each. The first entry // should have the trace output base address in bits 47:12, 6:9 // for Size, bits 4,2 and 0 must be cleared. The second entry // should have the base address of the table location in bits // 47:12, bits 4 and 2 must be cleared and bit 0 must be set. // if (FirstIn) { DEBUG ((DEBUG_INFO, "ProcTrace: Enabling ToPA scheme \n")); Pages = EFI_SIZE_TO_PAGES (sizeof (PROC_TRACE_TOPA_TABLE)); Alignment = 0x1000; if (ProcTraceData->EnableOnBspOnly) { // // When only enable ProcTrace on BSP, this is the first and only time ProcTraceInitialize() runs. // TopaTableBaseAddr = (UINTN)AllocateAlignedReservedPages (Pages, Alignment); if (TopaTableBaseAddr == 0) { DEBUG ((DEBUG_ERROR, "ProcTrace: Out of mem, failed to allocate ToPA mem for BSP")); return RETURN_OUT_OF_RESOURCES; } DEBUG ((DEBUG_INFO, "ProcTrace: Topa table address(aligned) for BSP only: 0x%llX \n", (UINT64)TopaTableBaseAddr)); } else { // // Let BSP allocate ToPA table mem for all threads // TopaMemArray = (UINTN *)AllocatePool (ProcTraceData->AllocatedThreads * sizeof (UINTN *)); if (TopaMemArray == NULL) { DEBUG ((DEBUG_ERROR, "ProcTrace: Allocate mem for ToPA Failed\n")); return RETURN_OUT_OF_RESOURCES; } ProcTraceData->TopaMemArray = TopaMemArray; for (Index = 0; Index < ProcTraceData->AllocatedThreads; Index++) { AlignedAddress = (UINTN)AllocateAlignedReservedPages (Pages, Alignment); if (AlignedAddress == 0) { if (Index < ProcTraceData->AllocatedThreads) { ProcTraceData->AllocatedThreads = Index; } DEBUG ((DEBUG_ERROR, "ProcTrace: Out of mem, allocated ToPA mem only for %d threads\n", ProcTraceData->AllocatedThreads)); if (Index == 0) { // // Could not allocate for BSP even // FreePool ((VOID *)TopaMemArray); TopaMemArray = NULL; return RETURN_OUT_OF_RESOURCES; } break; } TopaMemArray[Index] = AlignedAddress; DEBUG ((DEBUG_INFO, "ProcTrace: Topa table address(aligned) for thread %d is 0x%llX \n", Index, (UINT64)TopaMemArray[Index])); } DEBUG ((DEBUG_INFO, "ProcTrace: Allocated ToPA mem for %d thread \n", ProcTraceData->AllocatedThreads)); } } if (!ProcTraceData->EnableOnBspOnly) { if (ProcessorNumber < ProcTraceData->AllocatedThreads) { TopaTableBaseAddr = ProcTraceData->TopaMemArray[ProcessorNumber]; } else { return RETURN_SUCCESS; } } TopaTable = (PROC_TRACE_TOPA_TABLE *)TopaTableBaseAddr; TopaEntryPtr = &TopaTable->TopaEntry[0]; TopaEntryPtr->Uint64 = 0; TopaEntryPtr->Bits.Base = (MemRegionBaseAddr >> 12) & 0x000FFFFF; TopaEntryPtr->Bits.BaseHi = RShiftU64 ((UINT64)MemRegionBaseAddr, 32) & 0xFFFFFFFF; TopaEntryPtr->Bits.Size = ProcTraceData->ProcTraceMemSize; TopaEntryPtr->Bits.END = 0; TopaEntryPtr = &TopaTable->TopaEntry[1]; TopaEntryPtr->Uint64 = 0; TopaEntryPtr->Bits.Base = (TopaTableBaseAddr >> 12) & 0x000FFFFF; TopaEntryPtr->Bits.BaseHi = RShiftU64 ((UINT64)TopaTableBaseAddr, 32) & 0xFFFFFFFF; TopaEntryPtr->Bits.END = 1; // // Program the MSR IA32_RTIT_OUTPUT_BASE (0x560) bits[63:7] with ToPA base // OutputBaseReg.Uint64 = ProcTraceData->ProcessorData[ProcessorNumber].RtitOutputBase.Uint64; OutputBaseReg.Bits.Base = (TopaTableBaseAddr >> 7) & 0x01FFFFFF; OutputBaseReg.Bits.BaseHi = RShiftU64 ((UINT64)TopaTableBaseAddr, 32) & 0xFFFFFFFF; CPU_REGISTER_TABLE_WRITE64 ( ProcessorNumber, Msr, MSR_IA32_RTIT_OUTPUT_BASE, OutputBaseReg.Uint64 ); // // Set the MSR IA32_RTIT_OUTPUT_MASK (0x561) bits[63:7] to 0 // OutputMaskPtrsReg.Uint64 = ProcTraceData->ProcessorData[ProcessorNumber].RtitOutputMaskPtrs.Uint64; OutputMaskPtrsReg.Bits.MaskOrTableOffset = 0; OutputMaskPtrsReg.Bits.OutputOffset = 0; CPU_REGISTER_TABLE_WRITE64 ( ProcessorNumber, Msr, MSR_IA32_RTIT_OUTPUT_MASK_PTRS, OutputMaskPtrsReg.Uint64 ); // // Enable ToPA output scheme by enabling MSR IA32_RTIT_CTL (0x570) ToPA (Bit 8) // CtrlReg.Bits.ToPA = 1; CPU_REGISTER_TABLE_WRITE64 ( ProcessorNumber, Msr, MSR_IA32_RTIT_CTL, CtrlReg.Uint64 ); } /// /// Enable the Processor Trace feature from MSR IA32_RTIT_CTL (570h) /// CtrlReg.Bits.OS = 1; CtrlReg.Bits.User = 1; CtrlReg.Bits.BranchEn = 1; CtrlReg.Bits.TraceEn = 1; // // Generate CYC/TSC timing packets to collect performance data. // if (ProcTraceData->EnablePerformanceCollecting) { if (ProcTraceData->ProcessorData[ProcessorNumber].CycPacketSupported) { CtrlReg.Bits.CYCEn = 1; CtrlReg.Bits.CYCThresh = 5; } // // Write to TSCEn is always supported // CtrlReg.Bits.TSCEn = 1; } CPU_REGISTER_TABLE_WRITE64 ( ProcessorNumber, Msr, MSR_IA32_RTIT_CTL, CtrlReg.Uint64 ); return RETURN_SUCCESS; }