]> git.proxmox.com Git - mirror_edk2.git/blobdiff - UefiCpuPkg/Library/RegisterCpuFeaturesLib/CpuFeaturesInitialize.c
UefiCpuPkg: Refactor initialization of CPU features during S3 resume
[mirror_edk2.git] / UefiCpuPkg / Library / RegisterCpuFeaturesLib / CpuFeaturesInitialize.c
index e91a4388b4ec70beeda2995174d6a47117754829..6e2ab7951821b76444b1597e015c9770e7d39053 100644 (file)
@@ -1,33 +1,30 @@
 /** @file\r
   CPU Features Initialize functions.\r
 \r
-  Copyright (c) 2017, Intel Corporation. All rights reserved.<BR>\r
-  This program and the accompanying materials\r
-  are licensed and made available under the terms and conditions of the BSD License\r
-  which accompanies this distribution.  The full text of the license may be found at\r
-  http://opensource.org/licenses/bsd-license.php\r
-\r
-  THE PROGRAM IS DISTRIBUTED UNDER THE BSD LICENSE ON AN "AS IS" BASIS,\r
-  WITHOUT WARRANTIES OR REPRESENTATIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED.\r
+  Copyright (c) 2017 - 2021, Intel Corporation. All rights reserved.<BR>\r
+  SPDX-License-Identifier: BSD-2-Clause-Patent\r
 \r
 **/\r
 \r
 #include "RegisterCpuFeatures.h"\r
 \r
+CHAR16 *mDependTypeStr[]   = {L"None", L"Thread", L"Core", L"Package", L"Invalid" };\r
+\r
 /**\r
   Worker function to save PcdCpuFeaturesCapability.\r
 \r
   @param[in]  SupportedFeatureMask  The pointer to CPU feature bits mask buffer\r
+  @param[in]  BitMaskSize           CPU feature bits mask buffer size.\r
+\r
 **/\r
 VOID\r
 SetCapabilityPcd (\r
-  IN UINT8               *SupportedFeatureMask\r
+  IN UINT8               *SupportedFeatureMask,\r
+  IN UINTN               BitMaskSize\r
   )\r
 {\r
   EFI_STATUS             Status;\r
-  UINTN                  BitMaskSize;\r
 \r
-  BitMaskSize = PcdGetSize (PcdCpuFeaturesCapability);\r
   Status = PcdSetPtrS (PcdCpuFeaturesCapability, &BitMaskSize, SupportedFeatureMask);\r
   ASSERT_EFI_ERROR (Status);\r
 }\r
@@ -36,62 +33,20 @@ SetCapabilityPcd (
   Worker function to save PcdCpuFeaturesSetting.\r
 \r
   @param[in]  SupportedFeatureMask  The pointer to CPU feature bits mask buffer\r
+  @param[in]  BitMaskSize           CPU feature bits mask buffer size.\r
 **/\r
 VOID\r
 SetSettingPcd (\r
-  IN UINT8               *SupportedFeatureMask\r
+  IN UINT8               *SupportedFeatureMask,\r
+  IN UINTN               BitMaskSize\r
   )\r
 {\r
   EFI_STATUS             Status;\r
-  UINTN                  BitMaskSize;\r
 \r
-  BitMaskSize = PcdGetSize (PcdCpuFeaturesSetting);\r
   Status = PcdSetPtrS (PcdCpuFeaturesSetting, &BitMaskSize, SupportedFeatureMask);\r
   ASSERT_EFI_ERROR (Status);\r
 }\r
 \r
-/**\r
-  Worker function to get PcdCpuFeaturesSupport.\r
-\r
-  @return  The pointer to CPU feature bits mask buffer.\r
-**/\r
-UINT8 *\r
-GetSupportPcds (\r
-  VOID\r
-  )\r
-{\r
-  UINTN                  BitMaskSize;\r
-  UINT8                  *SupportBitMask;\r
-\r
-  BitMaskSize = PcdGetSize (PcdCpuFeaturesSupport);\r
-  SupportBitMask = AllocateZeroPool (BitMaskSize);\r
-  ASSERT (SupportBitMask != NULL);\r
-  SupportBitMask = (UINT8 *) PcdGetPtr (PcdCpuFeaturesSupport);\r
-\r
-  return SupportBitMask;\r
-}\r
-\r
-/**\r
-  Worker function to get PcdCpuFeaturesUserConfiguration.\r
-\r
-  @return  The pointer to CPU feature bits mask buffer.\r
-**/\r
-UINT8 *\r
-GetConfigurationPcds (\r
-  VOID\r
-  )\r
-{\r
-  UINTN                  BitMaskSize;\r
-  UINT8                  *SupportBitMask;\r
-\r
-  BitMaskSize = PcdGetSize (PcdCpuFeaturesUserConfiguration);\r
-  SupportBitMask = AllocateZeroPool (BitMaskSize);\r
-  ASSERT (SupportBitMask != NULL);\r
-  SupportBitMask = (UINT8 *) PcdGetPtr (PcdCpuFeaturesUserConfiguration);\r
-\r
-  return SupportBitMask;\r
-}\r
-\r
 /**\r
   Collects CPU type and feature information.\r
 \r
@@ -131,11 +86,10 @@ FillProcessorInfo (
 /**\r
   Prepares for private data used for CPU features.\r
 \r
-  @param[in]  NumberOfCpus  Number of processor in system\r
 **/\r
 VOID\r
 CpuInitDataInitialize (\r
-  IN UINTN                             NumberOfCpus\r
+  VOID\r
   )\r
 {\r
   EFI_STATUS                           Status;\r
@@ -145,11 +99,39 @@ CpuInitDataInitialize (
   CPU_FEATURES_INIT_ORDER              *InitOrder;\r
   CPU_FEATURES_DATA                    *CpuFeaturesData;\r
   LIST_ENTRY                           *Entry;\r
+  UINT32                               Core;\r
+  UINT32                               Package;\r
+  UINT32                               Thread;\r
+  EFI_CPU_PHYSICAL_LOCATION            *Location;\r
+  UINT32                               PackageIndex;\r
+  UINT32                               CoreIndex;\r
+  UINTN                                Pages;\r
+  UINT32                               FirstPackage;\r
+  UINT32                               *FirstCore;\r
+  UINT32                               *FirstThread;\r
+  ACPI_CPU_DATA                        *AcpiCpuData;\r
+  CPU_STATUS_INFORMATION               *CpuStatus;\r
+  UINT32                               *ThreadCountPerPackage;\r
+  UINT8                                *ThreadCountPerCore;\r
+  UINTN                                NumberOfCpus;\r
+  UINTN                                NumberOfEnabledProcessors;\r
+\r
+  Core    = 0;\r
+  Package = 0;\r
+  Thread  = 0;\r
 \r
   CpuFeaturesData = GetCpuFeaturesData ();\r
-  CpuFeaturesData->InitOrder = AllocateZeroPool (sizeof (CPU_FEATURES_INIT_ORDER) * NumberOfCpus);\r
+\r
+  //\r
+  // Initialize CpuFeaturesData->MpService as early as possile, so later function can use it.\r
+  //\r
+  CpuFeaturesData->MpService = GetMpService ();\r
+\r
+  GetNumberOfProcessor (&NumberOfCpus, &NumberOfEnabledProcessors);\r
+\r
+  CpuFeaturesData->InitOrder = AllocatePages (EFI_SIZE_TO_PAGES (sizeof (CPU_FEATURES_INIT_ORDER) * NumberOfCpus));\r
   ASSERT (CpuFeaturesData->InitOrder != NULL);\r
-  CpuFeaturesData->BitMaskSize = PcdGetSize (PcdCpuFeaturesSupport);\r
+  ZeroMem (CpuFeaturesData->InitOrder, sizeof (CPU_FEATURES_INIT_ORDER) * NumberOfCpus);\r
 \r
   //\r
   // Collect CPU Features information\r
@@ -164,6 +146,17 @@ CpuInitDataInitialize (
     Entry = Entry->ForwardLink;\r
   }\r
 \r
+  CpuFeaturesData->NumberOfCpus = (UINT32) NumberOfCpus;\r
+\r
+  AcpiCpuData = GetAcpiCpuData ();\r
+  ASSERT (AcpiCpuData != NULL);\r
+  CpuFeaturesData->AcpiCpuData= AcpiCpuData;\r
+\r
+  CpuStatus = &AcpiCpuData->CpuFeatureInitData.CpuStatus;\r
+  Location = AllocateZeroPool (sizeof (EFI_CPU_PHYSICAL_LOCATION) * NumberOfCpus);\r
+  ASSERT (Location != NULL);\r
+  AcpiCpuData->CpuFeatureInitData.ApLocation = (EFI_PHYSICAL_ADDRESS)(UINTN)Location;\r
+\r
   for (ProcessorNumber = 0; ProcessorNumber < NumberOfCpus; ProcessorNumber++) {\r
     InitOrder = &CpuFeaturesData->InitOrder[ProcessorNumber];\r
     InitOrder->FeaturesSupportedMask = AllocateZeroPool (CpuFeaturesData->BitMaskSize);\r
@@ -176,12 +169,140 @@ CpuInitDataInitialize (
       &ProcessorInfoBuffer,\r
       sizeof (EFI_PROCESSOR_INFORMATION)\r
       );\r
+    CopyMem (\r
+      &Location[ProcessorNumber],\r
+      &ProcessorInfoBuffer.Location,\r
+      sizeof (EFI_CPU_PHYSICAL_LOCATION)\r
+      );\r
+\r
+    //\r
+    // Collect CPU package count info.\r
+    //\r
+    if (Package < ProcessorInfoBuffer.Location.Package) {\r
+      Package = ProcessorInfoBuffer.Location.Package;\r
+    }\r
+    //\r
+    // Collect CPU max core count info.\r
+    //\r
+    if (Core < ProcessorInfoBuffer.Location.Core) {\r
+      Core = ProcessorInfoBuffer.Location.Core;\r
+    }\r
+    //\r
+    // Collect CPU max thread count info.\r
+    //\r
+    if (Thread < ProcessorInfoBuffer.Location.Thread) {\r
+      Thread = ProcessorInfoBuffer.Location.Thread;\r
+    }\r
+  }\r
+  CpuStatus->PackageCount    = Package + 1;\r
+  CpuStatus->MaxCoreCount    = Core + 1;\r
+  CpuStatus->MaxThreadCount  = Thread + 1;\r
+  DEBUG ((DEBUG_INFO, "Processor Info: Package: %d, MaxCore : %d, MaxThread: %d\n",\r
+         CpuStatus->PackageCount,\r
+         CpuStatus->MaxCoreCount,\r
+         CpuStatus->MaxThreadCount));\r
+\r
+  //\r
+  // Collect valid core count in each package because not all cores are valid.\r
+  //\r
+  ThreadCountPerPackage = AllocateZeroPool (sizeof (UINT32) * CpuStatus->PackageCount);\r
+  ASSERT (ThreadCountPerPackage != NULL);\r
+  CpuStatus->ThreadCountPerPackage = (EFI_PHYSICAL_ADDRESS)(UINTN)ThreadCountPerPackage;\r
+\r
+  ThreadCountPerCore = AllocateZeroPool (sizeof (UINT8) * CpuStatus->PackageCount * CpuStatus->MaxCoreCount);\r
+  ASSERT (ThreadCountPerCore != NULL);\r
+  CpuStatus->ThreadCountPerCore = (EFI_PHYSICAL_ADDRESS)(UINTN)ThreadCountPerCore;\r
+\r
+  for (ProcessorNumber = 0; ProcessorNumber < NumberOfCpus; ProcessorNumber++) {\r
+    Location = &CpuFeaturesData->InitOrder[ProcessorNumber].CpuInfo.ProcessorInfo.Location;\r
+    ThreadCountPerPackage[Location->Package]++;\r
+    ThreadCountPerCore[Location->Package * CpuStatus->MaxCoreCount + Location->Core]++;\r
   }\r
+\r
+  for (PackageIndex = 0; PackageIndex < CpuStatus->PackageCount; PackageIndex++) {\r
+    if (ThreadCountPerPackage[PackageIndex] != 0) {\r
+      DEBUG ((DEBUG_INFO, "P%02d: Thread Count = %d\n", PackageIndex, ThreadCountPerPackage[PackageIndex]));\r
+      for (CoreIndex = 0; CoreIndex < CpuStatus->MaxCoreCount; CoreIndex++) {\r
+        if (ThreadCountPerCore[PackageIndex * CpuStatus->MaxCoreCount + CoreIndex] != 0) {\r
+          DEBUG ((\r
+            DEBUG_INFO, "  P%02d C%04d, Thread Count = %d\n", PackageIndex, CoreIndex,\r
+            ThreadCountPerCore[PackageIndex * CpuStatus->MaxCoreCount + CoreIndex]\r
+            ));\r
+        }\r
+      }\r
+    }\r
+  }\r
+\r
+  CpuFeaturesData->CpuFlags.CoreSemaphoreCount = AllocateZeroPool (sizeof (UINT32) * CpuStatus->PackageCount * CpuStatus->MaxCoreCount * CpuStatus->MaxThreadCount);\r
+  ASSERT (CpuFeaturesData->CpuFlags.CoreSemaphoreCount != NULL);\r
+  CpuFeaturesData->CpuFlags.PackageSemaphoreCount = AllocateZeroPool (sizeof (UINT32) * CpuStatus->PackageCount * CpuStatus->MaxCoreCount * CpuStatus->MaxThreadCount);\r
+  ASSERT (CpuFeaturesData->CpuFlags.PackageSemaphoreCount != NULL);\r
+\r
+  //\r
+  // Initialize CpuFeaturesData->InitOrder[].CpuInfo.First\r
+  // Use AllocatePages () instead of AllocatePool () because pool cannot be freed in PEI phase but page can.\r
   //\r
-  // Get support and configuration PCDs\r
+  Pages     = EFI_SIZE_TO_PAGES (CpuStatus->PackageCount * sizeof (UINT32) + CpuStatus->PackageCount * CpuStatus->MaxCoreCount * sizeof (UINT32));\r
+  FirstCore = AllocatePages (Pages);\r
+  ASSERT (FirstCore != NULL);\r
+  FirstThread  = FirstCore + CpuStatus->PackageCount;\r
+\r
+  //\r
+  // Set FirstPackage, FirstCore[], FirstThread[] to maximum package ID, core ID, thread ID.\r
   //\r
-  CpuFeaturesData->SupportPcds       = GetSupportPcds ();\r
-  CpuFeaturesData->ConfigurationPcds = GetConfigurationPcds ();\r
+  FirstPackage = MAX_UINT32;\r
+  SetMem32 (FirstCore,   CpuStatus->PackageCount * sizeof (UINT32), MAX_UINT32);\r
+  SetMem32 (FirstThread, CpuStatus->PackageCount * CpuStatus->MaxCoreCount * sizeof (UINT32), MAX_UINT32);\r
+\r
+  for (ProcessorNumber = 0; ProcessorNumber < NumberOfCpus; ProcessorNumber++) {\r
+    Location = &CpuFeaturesData->InitOrder[ProcessorNumber].CpuInfo.ProcessorInfo.Location;\r
+\r
+    //\r
+    // Save the minimum package ID in the platform.\r
+    //\r
+    FirstPackage                 = MIN (Location->Package, FirstPackage);\r
+\r
+    //\r
+    // Save the minimum core ID per package.\r
+    //\r
+    FirstCore[Location->Package] = MIN (Location->Core, FirstCore[Location->Package]);\r
+\r
+    //\r
+    // Save the minimum thread ID per core.\r
+    //\r
+    FirstThread[Location->Package * CpuStatus->MaxCoreCount + Location->Core] = MIN (\r
+      Location->Thread,\r
+      FirstThread[Location->Package * CpuStatus->MaxCoreCount + Location->Core]\r
+    );\r
+  }\r
+\r
+  //\r
+  // Update the First field.\r
+  //\r
+  for (ProcessorNumber = 0; ProcessorNumber < NumberOfCpus; ProcessorNumber++) {\r
+    Location = &CpuFeaturesData->InitOrder[ProcessorNumber].CpuInfo.ProcessorInfo.Location;\r
+\r
+    if (Location->Package == FirstPackage) {\r
+      CpuFeaturesData->InitOrder[ProcessorNumber].CpuInfo.First.Package = 1;\r
+    }\r
+\r
+    //\r
+    // Set First.Die/Tile/Module for each thread assuming:\r
+    //  single Die under each package, single Tile under each Die, single Module under each Tile\r
+    //\r
+    CpuFeaturesData->InitOrder[ProcessorNumber].CpuInfo.First.Die = 1;\r
+    CpuFeaturesData->InitOrder[ProcessorNumber].CpuInfo.First.Tile = 1;\r
+    CpuFeaturesData->InitOrder[ProcessorNumber].CpuInfo.First.Module = 1;\r
+\r
+    if (Location->Core == FirstCore[Location->Package]) {\r
+      CpuFeaturesData->InitOrder[ProcessorNumber].CpuInfo.First.Core = 1;\r
+    }\r
+    if (Location->Thread == FirstThread[Location->Package * CpuStatus->MaxCoreCount + Location->Core]) {\r
+      CpuFeaturesData->InitOrder[ProcessorNumber].CpuInfo.First.Thread = 1;\r
+    }\r
+  }\r
+\r
+  FreePages (FirstCore, Pages);\r
 }\r
 \r
 /**\r
@@ -189,19 +310,20 @@ CpuInitDataInitialize (
 \r
   @param[in]  SupportedFeatureMask  The pointer to CPU feature bits mask buffer\r
   @param[in]  OrFeatureBitMask      The feature bit mask to do OR operation\r
+  @param[in]  BitMaskSize           The CPU feature bits mask buffer size.\r
+\r
 **/\r
 VOID\r
 SupportedMaskOr (\r
   IN UINT8               *SupportedFeatureMask,\r
-  IN UINT8               *OrFeatureBitMask\r
+  IN UINT8               *OrFeatureBitMask,\r
+  IN UINT32              BitMaskSize\r
   )\r
 {\r
   UINTN                  Index;\r
-  UINTN                  BitMaskSize;\r
   UINT8                  *Data1;\r
   UINT8                  *Data2;\r
 \r
-  BitMaskSize = PcdGetSize (PcdCpuFeaturesSupport);\r
   Data1 = SupportedFeatureMask;\r
   Data2 = OrFeatureBitMask;\r
   for (Index = 0; Index < BitMaskSize; Index++) {\r
@@ -214,19 +336,20 @@ SupportedMaskOr (
 \r
   @param[in]  SupportedFeatureMask  The pointer to CPU feature bits mask buffer\r
   @param[in]  AndFeatureBitMask     The feature bit mask to do AND operation\r
+  @param[in]  BitMaskSize           CPU feature bits mask buffer size.\r
+\r
 **/\r
 VOID\r
 SupportedMaskAnd (\r
-  IN UINT8               *SupportedFeatureMask,\r
-  IN UINT8               *AndFeatureBitMask\r
+  IN       UINT8               *SupportedFeatureMask,\r
+  IN CONST UINT8               *AndFeatureBitMask,\r
+  IN       UINT32              BitMaskSize\r
   )\r
 {\r
   UINTN                  Index;\r
-  UINTN                  BitMaskSize;\r
   UINT8                  *Data1;\r
-  UINT8                  *Data2;\r
+  CONST UINT8            *Data2;\r
 \r
-  BitMaskSize = PcdGetSize (PcdCpuFeaturesSupport);\r
   Data1 = SupportedFeatureMask;\r
   Data2 = AndFeatureBitMask;\r
   for (Index = 0; Index < BitMaskSize; Index++) {\r
@@ -239,19 +362,19 @@ SupportedMaskAnd (
 \r
   @param[in]  SupportedFeatureMask  The pointer to CPU feature bits mask buffer\r
   @param[in]  AndFeatureBitMask     The feature bit mask to do XOR operation\r
+  @param[in]  BitMaskSize           CPU feature bits mask buffer size.\r
 **/\r
 VOID\r
 SupportedMaskCleanBit (\r
   IN UINT8               *SupportedFeatureMask,\r
-  IN UINT8               *AndFeatureBitMask\r
+  IN UINT8               *AndFeatureBitMask,\r
+  IN UINT32              BitMaskSize\r
   )\r
 {\r
   UINTN                  Index;\r
-  UINTN                  BitMaskSize;\r
   UINT8                  *Data1;\r
   UINT8                  *Data2;\r
 \r
-  BitMaskSize = PcdGetSize (PcdCpuFeaturesSupport);\r
   Data1 = SupportedFeatureMask;\r
   Data2 = AndFeatureBitMask;\r
   for (Index = 0; Index < BitMaskSize; Index++) {\r
@@ -265,6 +388,7 @@ SupportedMaskCleanBit (
 \r
   @param[in]  SupportedFeatureMask   The pointer to CPU feature bits mask buffer\r
   @param[in]  ComparedFeatureBitMask The feature bit mask to be compared\r
+  @param[in]  BitMaskSize            CPU feature bits mask buffer size.\r
 \r
   @retval TRUE   The ComparedFeatureBitMask is set in CPU feature supported bits\r
                  mask buffer.\r
@@ -274,16 +398,14 @@ SupportedMaskCleanBit (
 BOOLEAN\r
 IsBitMaskMatch (\r
   IN UINT8               *SupportedFeatureMask,\r
-  IN UINT8               *ComparedFeatureBitMask\r
+  IN UINT8               *ComparedFeatureBitMask,\r
+  IN UINT32              BitMaskSize\r
   )\r
 {\r
   UINTN                  Index;\r
-  UINTN                  BitMaskSize;\r
   UINT8                  *Data1;\r
   UINT8                  *Data2;\r
 \r
-  BitMaskSize = PcdGetSize (PcdCpuFeaturesSupport);\r
-\r
   Data1 = SupportedFeatureMask;\r
   Data2 = ComparedFeatureBitMask;\r
   for (Index = 0; Index < BitMaskSize; Index++) {\r
@@ -311,8 +433,8 @@ CollectProcessorData (
   LIST_ENTRY                           *Entry;\r
   CPU_FEATURES_DATA                    *CpuFeaturesData;\r
 \r
-  CpuFeaturesData = GetCpuFeaturesData ();\r
-  ProcessorNumber = GetProcessorIndex ();\r
+  CpuFeaturesData = (CPU_FEATURES_DATA *)Buffer;\r
+  ProcessorNumber = GetProcessorIndex (CpuFeaturesData);\r
   CpuInfo = &CpuFeaturesData->InitOrder[ProcessorNumber].CpuInfo;\r
   //\r
   // collect processor information\r
@@ -321,21 +443,21 @@ CollectProcessorData (
   Entry = GetFirstNode (&CpuFeaturesData->FeatureList);\r
   while (!IsNull (&CpuFeaturesData->FeatureList, Entry)) {\r
     CpuFeature = CPU_FEATURE_ENTRY_FROM_LINK (Entry);\r
-    if (IsBitMaskMatch (CpuFeaturesData->SupportPcds, CpuFeature->FeatureMask)) {\r
-      if (CpuFeature->SupportFunc == NULL) {\r
-        //\r
-        // If SupportFunc is NULL, then the feature is supported.\r
-        //\r
-        SupportedMaskOr (\r
-          CpuFeaturesData->InitOrder[ProcessorNumber].FeaturesSupportedMask,\r
-          CpuFeature->FeatureMask\r
-          );\r
-      } else if (CpuFeature->SupportFunc (ProcessorNumber, CpuInfo, CpuFeature->ConfigData)) {\r
-        SupportedMaskOr (\r
-          CpuFeaturesData->InitOrder[ProcessorNumber].FeaturesSupportedMask,\r
-          CpuFeature->FeatureMask\r
-          );\r
-      }\r
+    if (CpuFeature->SupportFunc == NULL) {\r
+      //\r
+      // If SupportFunc is NULL, then the feature is supported.\r
+      //\r
+      SupportedMaskOr (\r
+        CpuFeaturesData->InitOrder[ProcessorNumber].FeaturesSupportedMask,\r
+        CpuFeature->FeatureMask,\r
+        CpuFeaturesData->BitMaskSize\r
+        );\r
+    } else if (CpuFeature->SupportFunc (ProcessorNumber, CpuInfo, CpuFeature->ConfigData)) {\r
+      SupportedMaskOr (\r
+        CpuFeaturesData->InitOrder[ProcessorNumber].FeaturesSupportedMask,\r
+        CpuFeature->FeatureMask,\r
+        CpuFeaturesData->BitMaskSize\r
+        );\r
     }\r
     Entry = Entry->ForwardLink;\r
   }\r
@@ -376,8 +498,9 @@ DumpRegisterTableOnProcessor (
     case Msr:\r
       DEBUG ((\r
         DebugPrintErrorLevel,\r
-        "Processor: %d:   MSR: %x, Bit Start: %d, Bit Length: %d, Value: %lx\r\n",\r
-        ProcessorNumber,\r
+        "Processor: %04d: Index %04d, MSR  : %08x, Bit Start: %02d, Bit Length: %02d, Value: %016lx\r\n",\r
+        (UINT32) ProcessorNumber,\r
+        (UINT32) FeatureIndex,\r
         RegisterTableEntry->Index,\r
         RegisterTableEntry->ValidBitStart,\r
         RegisterTableEntry->ValidBitLength,\r
@@ -387,8 +510,9 @@ DumpRegisterTableOnProcessor (
     case ControlRegister:\r
       DEBUG ((\r
         DebugPrintErrorLevel,\r
-        "Processor: %d:    CR: %x, Bit Start: %d, Bit Length: %d, Value: %lx\r\n",\r
-        ProcessorNumber,\r
+        "Processor: %04d: Index %04d, CR   : %08x, Bit Start: %02d, Bit Length: %02d, Value: %016lx\r\n",\r
+        (UINT32) ProcessorNumber,\r
+        (UINT32) FeatureIndex,\r
         RegisterTableEntry->Index,\r
         RegisterTableEntry->ValidBitStart,\r
         RegisterTableEntry->ValidBitLength,\r
@@ -398,8 +522,9 @@ DumpRegisterTableOnProcessor (
     case MemoryMapped:\r
       DEBUG ((\r
         DebugPrintErrorLevel,\r
-        "Processor: %d:  MMIO: %lx, Bit Start: %d, Bit Length: %d, Value: %lx\r\n",\r
-        ProcessorNumber,\r
+        "Processor: %04d: Index %04d, MMIO : %016lx, Bit Start: %02d, Bit Length: %02d, Value: %016lx\r\n",\r
+        (UINT32) ProcessorNumber,\r
+        (UINT32) FeatureIndex,\r
         RegisterTableEntry->Index | LShiftU64 (RegisterTableEntry->HighIndex, 32),\r
         RegisterTableEntry->ValidBitStart,\r
         RegisterTableEntry->ValidBitLength,\r
@@ -409,20 +534,57 @@ DumpRegisterTableOnProcessor (
     case CacheControl:\r
       DEBUG ((\r
         DebugPrintErrorLevel,\r
-        "Processor: %d: CACHE: %x, Bit Start: %d, Bit Length: %d, Value: %lx\r\n",\r
-        ProcessorNumber,\r
+        "Processor: %04d: Index %04d, CACHE: %08x, Bit Start: %02d, Bit Length: %02d, Value: %016lx\r\n",\r
+        (UINT32) ProcessorNumber,\r
+        (UINT32) FeatureIndex,\r
         RegisterTableEntry->Index,\r
         RegisterTableEntry->ValidBitStart,\r
         RegisterTableEntry->ValidBitLength,\r
         RegisterTableEntry->Value\r
         ));\r
       break;\r
+    case Semaphore:\r
+      DEBUG ((\r
+        DebugPrintErrorLevel,\r
+        "Processor: %04d: Index %04d, SEMAP: %s\r\n",\r
+        (UINT32) ProcessorNumber,\r
+        (UINT32) FeatureIndex,\r
+        mDependTypeStr[MIN ((UINT32)RegisterTableEntry->Value, InvalidDepType)]\r
+        ));\r
+      break;\r
+\r
     default:\r
       break;\r
     }\r
   }\r
 }\r
 \r
+/**\r
+  Get the biggest dependence type.\r
+  PackageDepType > CoreDepType > ThreadDepType > NoneDepType.\r
+\r
+  @param[in]  BeforeDep           Before dependence type.\r
+  @param[in]  AfterDep            After dependence type.\r
+  @param[in]  NoneNeibBeforeDep   Before dependence type for not neighborhood features.\r
+  @param[in]  NoneNeibAfterDep    After dependence type for not neighborhood features.\r
+\r
+  @retval  Return the biggest dependence type.\r
+**/\r
+CPU_FEATURE_DEPENDENCE_TYPE\r
+BiggestDep (\r
+  IN CPU_FEATURE_DEPENDENCE_TYPE  BeforeDep,\r
+  IN CPU_FEATURE_DEPENDENCE_TYPE  AfterDep,\r
+  IN CPU_FEATURE_DEPENDENCE_TYPE  NoneNeibBeforeDep,\r
+  IN CPU_FEATURE_DEPENDENCE_TYPE  NoneNeibAfterDep\r
+  )\r
+{\r
+  CPU_FEATURE_DEPENDENCE_TYPE Bigger;\r
+\r
+  Bigger = MAX (BeforeDep, AfterDep);\r
+  Bigger = MAX (Bigger, NoneNeibBeforeDep);\r
+  return MAX(Bigger, NoneNeibAfterDep);\r
+}\r
+\r
 /**\r
   Analysis register CPU features on each processor and save CPU setting in CPU register table.\r
 \r
@@ -442,31 +604,31 @@ AnalysisProcessorFeatures (
   REGISTER_CPU_FEATURE_INFORMATION     *CpuInfo;\r
   LIST_ENTRY                           *Entry;\r
   CPU_FEATURES_DATA                    *CpuFeaturesData;\r
+  LIST_ENTRY                           *NextEntry;\r
+  CPU_FEATURES_ENTRY                   *NextCpuFeatureInOrder;\r
+  BOOLEAN                              Success;\r
+  CPU_FEATURE_DEPENDENCE_TYPE          BeforeDep;\r
+  CPU_FEATURE_DEPENDENCE_TYPE          AfterDep;\r
+  CPU_FEATURE_DEPENDENCE_TYPE          NoneNeibBeforeDep;\r
+  CPU_FEATURE_DEPENDENCE_TYPE          NoneNeibAfterDep;\r
 \r
   CpuFeaturesData = GetCpuFeaturesData ();\r
-  CpuFeaturesData->CapabilityPcds = AllocatePool (CpuFeaturesData->BitMaskSize);\r
-  ASSERT (CpuFeaturesData->CapabilityPcds != NULL);\r
-  SetMem (CpuFeaturesData->CapabilityPcds, CpuFeaturesData->BitMaskSize, 0xFF);\r
+  CpuFeaturesData->CapabilityPcd = AllocatePool (CpuFeaturesData->BitMaskSize);\r
+  ASSERT (CpuFeaturesData->CapabilityPcd != NULL);\r
+  SetMem (CpuFeaturesData->CapabilityPcd, CpuFeaturesData->BitMaskSize, 0xFF);\r
   for (ProcessorNumber = 0; ProcessorNumber < NumberOfCpus; ProcessorNumber++) {\r
     CpuInitOrder = &CpuFeaturesData->InitOrder[ProcessorNumber];\r
     //\r
     // Calculate the last capability on all processors\r
     //\r
-    SupportedMaskAnd (CpuFeaturesData->CapabilityPcds, CpuInitOrder->FeaturesSupportedMask);\r
+    SupportedMaskAnd (CpuFeaturesData->CapabilityPcd, CpuInitOrder->FeaturesSupportedMask, CpuFeaturesData->BitMaskSize);\r
   }\r
   //\r
   // Calculate the last setting\r
   //\r
-\r
-  CpuFeaturesData->SettingPcds = AllocateCopyPool (CpuFeaturesData->BitMaskSize, CpuFeaturesData->CapabilityPcds);\r
-  ASSERT (CpuFeaturesData->SettingPcds != NULL);\r
-  SupportedMaskAnd (CpuFeaturesData->SettingPcds, CpuFeaturesData->ConfigurationPcds);\r
-\r
-  //\r
-  // Save PCDs and display CPU PCDs\r
-  //\r
-  SetCapabilityPcd (CpuFeaturesData->CapabilityPcds);\r
-  SetSettingPcd (CpuFeaturesData->SettingPcds);\r
+  CpuFeaturesData->SettingPcd = AllocateCopyPool (CpuFeaturesData->BitMaskSize, CpuFeaturesData->CapabilityPcd);\r
+  ASSERT (CpuFeaturesData->SettingPcd != NULL);\r
+  SupportedMaskAnd (CpuFeaturesData->SettingPcd, PcdGetPtr (PcdCpuFeaturesSetting), CpuFeaturesData->BitMaskSize);\r
 \r
   //\r
   // Dump the last CPU feature list\r
@@ -476,8 +638,8 @@ AnalysisProcessorFeatures (
     Entry = GetFirstNode (&CpuFeaturesData->FeatureList);\r
     while (!IsNull (&CpuFeaturesData->FeatureList, Entry)) {\r
       CpuFeature = CPU_FEATURE_ENTRY_FROM_LINK (Entry);\r
-      if (IsBitMaskMatch (CpuFeature->FeatureMask, CpuFeaturesData->CapabilityPcds)) {\r
-        if (IsBitMaskMatch (CpuFeature->FeatureMask, CpuFeaturesData->SettingPcds)) {\r
+      if (IsBitMaskMatch (CpuFeature->FeatureMask, CpuFeaturesData->CapabilityPcd, CpuFeaturesData->BitMaskSize)) {\r
+        if (IsBitMaskMatch (CpuFeature->FeatureMask, CpuFeaturesData->SettingPcd, CpuFeaturesData->BitMaskSize)) {\r
           DEBUG ((DEBUG_INFO, "[Enable   ] "));\r
         } else {\r
           DEBUG ((DEBUG_INFO, "[Disable  ] "));\r
@@ -485,19 +647,23 @@ AnalysisProcessorFeatures (
       } else {\r
         DEBUG ((DEBUG_INFO, "[Unsupport] "));\r
       }\r
-      DumpCpuFeature (CpuFeature);\r
+      DumpCpuFeature (CpuFeature, CpuFeaturesData->BitMaskSize);\r
       Entry = Entry->ForwardLink;\r
     }\r
-    DEBUG ((DEBUG_INFO, "PcdCpuFeaturesSupport:\n"));\r
-    DumpCpuFeatureMask (CpuFeaturesData->SupportPcds);\r
-    DEBUG ((DEBUG_INFO, "PcdCpuFeaturesUserConfiguration:\n"));\r
-    DumpCpuFeatureMask (CpuFeaturesData->ConfigurationPcds);\r
     DEBUG ((DEBUG_INFO, "PcdCpuFeaturesCapability:\n"));\r
-    DumpCpuFeatureMask (CpuFeaturesData->CapabilityPcds);\r
-    DEBUG ((DEBUG_INFO, "PcdCpuFeaturesSetting:\n"));\r
-    DumpCpuFeatureMask (CpuFeaturesData->SettingPcds);\r
+    DumpCpuFeatureMask (CpuFeaturesData->CapabilityPcd, CpuFeaturesData->BitMaskSize);\r
+    DEBUG ((DEBUG_INFO, "Origin PcdCpuFeaturesSetting:\n"));\r
+    DumpCpuFeatureMask (PcdGetPtr (PcdCpuFeaturesSetting), CpuFeaturesData->BitMaskSize);\r
+    DEBUG ((DEBUG_INFO, "Final PcdCpuFeaturesSetting:\n"));\r
+    DumpCpuFeatureMask (CpuFeaturesData->SettingPcd, CpuFeaturesData->BitMaskSize);\r
   );\r
 \r
+  //\r
+  // Save PCDs and display CPU PCDs\r
+  //\r
+  SetCapabilityPcd (CpuFeaturesData->CapabilityPcd, CpuFeaturesData->BitMaskSize);\r
+  SetSettingPcd (CpuFeaturesData->SettingPcd, CpuFeaturesData->BitMaskSize);\r
+\r
   for (ProcessorNumber = 0; ProcessorNumber < NumberOfCpus; ProcessorNumber++) {\r
     CpuInitOrder = &CpuFeaturesData->InitOrder[ProcessorNumber];\r
     Entry = GetFirstNode (&CpuFeaturesData->FeatureList);\r
@@ -506,7 +672,7 @@ AnalysisProcessorFeatures (
       // Insert each feature into processor's order list\r
       //\r
       CpuFeature = CPU_FEATURE_ENTRY_FROM_LINK (Entry);\r
-      if (IsBitMaskMatch (CpuFeature->FeatureMask, CpuFeaturesData->CapabilityPcds)) {\r
+      if (IsBitMaskMatch (CpuFeature->FeatureMask, CpuFeaturesData->CapabilityPcd, CpuFeaturesData->BitMaskSize)) {\r
         CpuFeatureInOrder = AllocateCopyPool (sizeof (CPU_FEATURES_ENTRY), CpuFeature);\r
         ASSERT (CpuFeatureInOrder != NULL);\r
         InsertTailList (&CpuInitOrder->OrderList, &CpuFeatureInOrder->Link);\r
@@ -520,24 +686,75 @@ AnalysisProcessorFeatures (
     Entry = GetFirstNode (&CpuInitOrder->OrderList);\r
     while (!IsNull (&CpuInitOrder->OrderList, Entry)) {\r
       CpuFeatureInOrder = CPU_FEATURE_ENTRY_FROM_LINK (Entry);\r
-      if (IsBitMaskMatch (CpuFeatureInOrder->FeatureMask, CpuFeaturesData->SettingPcds)) {\r
+\r
+      Success = FALSE;\r
+      if (IsBitMaskMatch (CpuFeatureInOrder->FeatureMask, CpuFeaturesData->SettingPcd, CpuFeaturesData->BitMaskSize)) {\r
         Status = CpuFeatureInOrder->InitializeFunc (ProcessorNumber, CpuInfo, CpuFeatureInOrder->ConfigData, TRUE);\r
         if (EFI_ERROR (Status)) {\r
           //\r
           // Clean the CpuFeatureInOrder->FeatureMask in setting PCD.\r
           //\r
-          SupportedMaskCleanBit (CpuFeaturesData->SettingPcds, CpuFeatureInOrder->FeatureMask);\r
+          SupportedMaskCleanBit (CpuFeaturesData->SettingPcd, CpuFeatureInOrder->FeatureMask, CpuFeaturesData->BitMaskSize);\r
           if (CpuFeatureInOrder->FeatureName != NULL) {\r
-            DEBUG ((DEBUG_WARN, "Warning :: Failed to enable Feature Name = %a.\n", CpuFeatureInOrder->FeatureName));\r
+            DEBUG ((DEBUG_WARN, "Warning :: Failed to enable Feature: Name = %a.\n", CpuFeatureInOrder->FeatureName));\r
           } else {\r
-            DEBUG ((DEBUG_WARN, "Warning :: Failed to enable Feature Mask = "));\r
-            DumpCpuFeatureMask (CpuFeatureInOrder->FeatureMask);\r
+            DEBUG ((DEBUG_WARN, "Warning :: Failed to enable Feature: Mask = "));\r
+            DumpCpuFeatureMask (CpuFeatureInOrder->FeatureMask, CpuFeaturesData->BitMaskSize);\r
           }\r
+        } else {\r
+          Success = TRUE;\r
         }\r
       } else {\r
         Status = CpuFeatureInOrder->InitializeFunc (ProcessorNumber, CpuInfo, CpuFeatureInOrder->ConfigData, FALSE);\r
-        ASSERT_EFI_ERROR (Status);\r
+        if (EFI_ERROR (Status)) {\r
+          if (CpuFeatureInOrder->FeatureName != NULL) {\r
+            DEBUG ((DEBUG_WARN, "Warning :: Failed to disable Feature: Name = %a.\n", CpuFeatureInOrder->FeatureName));\r
+          } else {\r
+            DEBUG ((DEBUG_WARN, "Warning :: Failed to disable Feature: Mask = "));\r
+            DumpCpuFeatureMask (CpuFeatureInOrder->FeatureMask, CpuFeaturesData->BitMaskSize);\r
+          }\r
+        } else {\r
+          Success = TRUE;\r
+        }\r
+      }\r
+\r
+      if (Success) {\r
+        NextEntry = Entry->ForwardLink;\r
+        if (!IsNull (&CpuInitOrder->OrderList, NextEntry)) {\r
+          NextCpuFeatureInOrder = CPU_FEATURE_ENTRY_FROM_LINK (NextEntry);\r
+\r
+          //\r
+          // If feature has dependence with the next feature (ONLY care core/package dependency).\r
+          // and feature initialize succeed, add sync semaphere here.\r
+          //\r
+          BeforeDep = DetectFeatureScope (CpuFeatureInOrder, TRUE, NextCpuFeatureInOrder->FeatureMask);\r
+          AfterDep  = DetectFeatureScope (NextCpuFeatureInOrder, FALSE, CpuFeatureInOrder->FeatureMask);\r
+          //\r
+          // Check whether next feature has After type dependence with not neighborhood CPU\r
+          // Features in former CPU features.\r
+          //\r
+          NoneNeibAfterDep = DetectNoneNeighborhoodFeatureScope(NextCpuFeatureInOrder, FALSE, &CpuInitOrder->OrderList);\r
+        } else {\r
+          BeforeDep        = NoneDepType;\r
+          AfterDep         = NoneDepType;\r
+          NoneNeibAfterDep = NoneDepType;\r
+        }\r
+        //\r
+        // Check whether current feature has Before type dependence with none neighborhood\r
+        // CPU features in after Cpu features.\r
+        //\r
+        NoneNeibBeforeDep = DetectNoneNeighborhoodFeatureScope(CpuFeatureInOrder, TRUE, &CpuInitOrder->OrderList);\r
+\r
+        //\r
+        // Get the biggest dependence and add semaphore for it.\r
+        // PackageDepType > CoreDepType > ThreadDepType > NoneDepType.\r
+        //\r
+        BeforeDep = BiggestDep(BeforeDep, AfterDep, NoneNeibBeforeDep, NoneNeibAfterDep);\r
+        if (BeforeDep > ThreadDepType) {\r
+          CPU_REGISTER_TABLE_WRITE32 (ProcessorNumber, Semaphore, 0, BeforeDep);\r
+        }\r
       }\r
+\r
       Entry = Entry->ForwardLink;\r
     }\r
 \r
@@ -546,7 +763,7 @@ AnalysisProcessorFeatures (
     // again during initialize the features.\r
     //\r
     DEBUG ((DEBUG_INFO, "Dump final value for PcdCpuFeaturesSetting:\n"));\r
-    DumpCpuFeatureMask (CpuFeaturesData->SettingPcds);\r
+    DumpCpuFeatureMask (CpuFeaturesData->SettingPcd, CpuFeaturesData->BitMaskSize);\r
 \r
     //\r
     // Dump the RegisterTable\r
@@ -555,27 +772,130 @@ AnalysisProcessorFeatures (
   }\r
 }\r
 \r
+/**\r
+  Increment semaphore by 1.\r
+\r
+  @param      Sem            IN:  32-bit unsigned integer\r
+\r
+**/\r
+VOID\r
+LibReleaseSemaphore (\r
+  IN OUT  volatile UINT32           *Sem\r
+  )\r
+{\r
+  InterlockedIncrement (Sem);\r
+}\r
+\r
+/**\r
+  Decrement the semaphore by 1 if it is not zero.\r
+\r
+  Performs an atomic decrement operation for semaphore.\r
+  The compare exchange operation must be performed using\r
+  MP safe mechanisms.\r
+\r
+  @param      Sem            IN:  32-bit unsigned integer\r
+\r
+**/\r
+VOID\r
+LibWaitForSemaphore (\r
+  IN OUT  volatile UINT32           *Sem\r
+  )\r
+{\r
+  UINT32  Value;\r
+\r
+  do {\r
+    Value = *Sem;\r
+  } while (Value == 0 ||\r
+           InterlockedCompareExchange32 (\r
+             Sem,\r
+             Value,\r
+             Value - 1\r
+             ) != Value);\r
+}\r
+\r
+/**\r
+  Read / write CR value.\r
+\r
+  @param[in]      CrIndex         The CR index which need to read/write.\r
+  @param[in]      Read            Read or write. TRUE is read.\r
+  @param[in,out]  CrValue         CR value.\r
+\r
+  @retval    EFI_SUCCESS means read/write success, else return EFI_UNSUPPORTED.\r
+**/\r
+UINTN\r
+ReadWriteCr (\r
+  IN     UINT32       CrIndex,\r
+  IN     BOOLEAN      Read,\r
+  IN OUT UINTN        *CrValue\r
+  )\r
+{\r
+  switch (CrIndex) {\r
+  case 0:\r
+    if (Read) {\r
+      *CrValue = AsmReadCr0 ();\r
+    } else {\r
+      AsmWriteCr0 (*CrValue);\r
+    }\r
+    break;\r
+  case 2:\r
+    if (Read) {\r
+      *CrValue = AsmReadCr2 ();\r
+    } else {\r
+      AsmWriteCr2 (*CrValue);\r
+    }\r
+    break;\r
+  case 3:\r
+    if (Read) {\r
+      *CrValue = AsmReadCr3 ();\r
+    } else {\r
+      AsmWriteCr3 (*CrValue);\r
+    }\r
+    break;\r
+  case 4:\r
+    if (Read) {\r
+      *CrValue = AsmReadCr4 ();\r
+    } else {\r
+      AsmWriteCr4 (*CrValue);\r
+    }\r
+    break;\r
+  default:\r
+    return EFI_UNSUPPORTED;;\r
+  }\r
+\r
+  return EFI_SUCCESS;\r
+}\r
+\r
 /**\r
   Initialize the CPU registers from a register table.\r
 \r
-  @param[in]  ProcessorNumber  The index of the CPU executing this function.\r
+  @param[in]  RegisterTable         The register table for this AP.\r
+  @param[in]  ApLocation            AP location info for this ap.\r
+  @param[in]  CpuStatus             CPU status info for this CPU.\r
+  @param[in]  CpuFlags              Flags data structure used when program the register.\r
 \r
   @note This service could be called by BSP/APs.\r
 **/\r
 VOID\r
 ProgramProcessorRegister (\r
-  IN UINTN  ProcessorNumber\r
+  IN CPU_REGISTER_TABLE           *RegisterTable,\r
+  IN EFI_CPU_PHYSICAL_LOCATION    *ApLocation,\r
+  IN CPU_STATUS_INFORMATION       *CpuStatus,\r
+  IN PROGRAM_CPU_REGISTER_FLAGS   *CpuFlags\r
   )\r
 {\r
-  CPU_FEATURES_DATA         *CpuFeaturesData;\r
-  CPU_REGISTER_TABLE        *RegisterTable;\r
   CPU_REGISTER_TABLE_ENTRY  *RegisterTableEntry;\r
   UINTN                     Index;\r
   UINTN                     Value;\r
   CPU_REGISTER_TABLE_ENTRY  *RegisterTableEntryHead;\r
-\r
-  CpuFeaturesData = GetCpuFeaturesData ();\r
-  RegisterTable = &CpuFeaturesData->RegisterTable[ProcessorNumber];\r
+  volatile UINT32           *SemaphorePtr;\r
+  UINT32                    FirstThread;\r
+  UINT32                    CurrentThread;\r
+  UINT32                    CurrentCore;\r
+  UINTN                     ProcessorIndex;\r
+  UINT32                    *ThreadCountPerPackage;\r
+  UINT8                     *ThreadCountPerCore;\r
+  EFI_STATUS                Status;\r
+  UINT64                    CurrentValue;\r
 \r
   //\r
   // Traverse Register Table of this logical processor\r
@@ -594,64 +914,51 @@ ProgramProcessorRegister (
     // The specified register is Control Register\r
     //\r
     case ControlRegister:\r
-      switch (RegisterTableEntry->Index) {\r
-      case 0:\r
-        Value = AsmReadCr0 ();\r
-        Value = (UINTN) BitFieldWrite64 (\r
-                          Value,\r
-                          RegisterTableEntry->ValidBitStart,\r
-                          RegisterTableEntry->ValidBitStart + RegisterTableEntry->ValidBitLength - 1,\r
-                          RegisterTableEntry->Value\r
-                          );\r
-        AsmWriteCr0 (Value);\r
-        break;\r
-      case 2:\r
-        Value = AsmReadCr2 ();\r
-        Value = (UINTN) BitFieldWrite64 (\r
-                          Value,\r
-                          RegisterTableEntry->ValidBitStart,\r
-                          RegisterTableEntry->ValidBitStart + RegisterTableEntry->ValidBitLength - 1,\r
-                          RegisterTableEntry->Value\r
-                          );\r
-        AsmWriteCr2 (Value);\r
-        break;\r
-      case 3:\r
-        Value = AsmReadCr3 ();\r
-        Value = (UINTN) BitFieldWrite64 (\r
-                          Value,\r
-                          RegisterTableEntry->ValidBitStart,\r
-                          RegisterTableEntry->ValidBitStart + RegisterTableEntry->ValidBitLength - 1,\r
-                          RegisterTableEntry->Value\r
-                          );\r
-        AsmWriteCr3 (Value);\r
-        break;\r
-      case 4:\r
-        Value = AsmReadCr4 ();\r
-        Value = (UINTN) BitFieldWrite64 (\r
-                          Value,\r
-                          RegisterTableEntry->ValidBitStart,\r
-                          RegisterTableEntry->ValidBitStart + RegisterTableEntry->ValidBitLength - 1,\r
-                          RegisterTableEntry->Value\r
-                          );\r
-        AsmWriteCr4 (Value);\r
-        break;\r
-      case 8:\r
-        //\r
-        //  Do we need to support CR8?\r
-        //\r
-        break;\r
-      default:\r
+      Status = ReadWriteCr (RegisterTableEntry->Index, TRUE, &Value);\r
+      if (EFI_ERROR (Status)) {\r
         break;\r
       }\r
+      if (RegisterTableEntry->TestThenWrite) {\r
+        CurrentValue = BitFieldRead64 (\r
+                         Value,\r
+                         RegisterTableEntry->ValidBitStart,\r
+                         RegisterTableEntry->ValidBitStart + RegisterTableEntry->ValidBitLength - 1\r
+                         );\r
+        if (CurrentValue == RegisterTableEntry->Value) {\r
+          break;\r
+        }\r
+      }\r
+      Value = (UINTN) BitFieldWrite64 (\r
+                        Value,\r
+                        RegisterTableEntry->ValidBitStart,\r
+                        RegisterTableEntry->ValidBitStart + RegisterTableEntry->ValidBitLength - 1,\r
+                        RegisterTableEntry->Value\r
+                        );\r
+      ReadWriteCr (RegisterTableEntry->Index, FALSE, &Value);\r
       break;\r
+\r
     //\r
     // The specified register is Model Specific Register\r
     //\r
     case Msr:\r
-      //\r
-      // Get lock to avoid Package/Core scope MSRs programming issue in parallel execution mode\r
-      //\r
-      AcquireSpinLock (&CpuFeaturesData->MsrLock);\r
+      if (RegisterTableEntry->TestThenWrite) {\r
+        Value = (UINTN)AsmReadMsr64 (RegisterTableEntry->Index);\r
+        if (RegisterTableEntry->ValidBitLength >= 64) {\r
+          if (Value == RegisterTableEntry->Value) {\r
+            break;\r
+          }\r
+        } else {\r
+          CurrentValue = BitFieldRead64 (\r
+                           Value,\r
+                           RegisterTableEntry->ValidBitStart,\r
+                           RegisterTableEntry->ValidBitStart + RegisterTableEntry->ValidBitLength - 1\r
+                           );\r
+          if (CurrentValue == RegisterTableEntry->Value) {\r
+            break;\r
+          }\r
+        }\r
+      }\r
+\r
       if (RegisterTableEntry->ValidBitLength >= 64) {\r
         //\r
         // If length is not less than 64 bits, then directly write without reading\r
@@ -671,20 +978,19 @@ ProgramProcessorRegister (
           RegisterTableEntry->Value\r
           );\r
       }\r
-      ReleaseSpinLock (&CpuFeaturesData->MsrLock);\r
       break;\r
     //\r
     // MemoryMapped operations\r
     //\r
     case MemoryMapped:\r
-      AcquireSpinLock (&CpuFeaturesData->MemoryMappedLock);\r
+      AcquireSpinLock (&CpuFlags->MemoryMappedLock);\r
       MmioBitFieldWrite32 (\r
         (UINTN)(RegisterTableEntry->Index | LShiftU64 (RegisterTableEntry->HighIndex, 32)),\r
         RegisterTableEntry->ValidBitStart,\r
         RegisterTableEntry->ValidBitStart + RegisterTableEntry->ValidBitLength - 1,\r
         (UINT32)RegisterTableEntry->Value\r
         );\r
-      ReleaseSpinLock (&CpuFeaturesData->MemoryMappedLock);\r
+      ReleaseSpinLock (&CpuFlags->MemoryMappedLock);\r
       break;\r
     //\r
     // Enable or disable cache\r
@@ -700,6 +1006,102 @@ ProgramProcessorRegister (
       }\r
       break;\r
 \r
+    case Semaphore:\r
+      // Semaphore works logic like below:\r
+      //\r
+      //  V(x) = LibReleaseSemaphore (Semaphore[FirstThread + x]);\r
+      //  P(x) = LibWaitForSemaphore (Semaphore[FirstThread + x]);\r
+      //\r
+      //  All threads (T0...Tn) waits in P() line and continues running\r
+      //  together.\r
+      //\r
+      //\r
+      //  T0             T1            ...           Tn\r
+      //\r
+      //  V(0...n)       V(0...n)      ...           V(0...n)\r
+      //  n * P(0)       n * P(1)      ...           n * P(n)\r
+      //\r
+      switch (RegisterTableEntry->Value) {\r
+      case CoreDepType:\r
+        SemaphorePtr = CpuFlags->CoreSemaphoreCount;\r
+        ThreadCountPerCore = (UINT8 *)(UINTN)CpuStatus->ThreadCountPerCore;\r
+\r
+        CurrentCore = ApLocation->Package * CpuStatus->MaxCoreCount + ApLocation->Core;\r
+        //\r
+        // Get Offset info for the first thread in the core which current thread belongs to.\r
+        //\r
+        FirstThread   = CurrentCore * CpuStatus->MaxThreadCount;\r
+        CurrentThread = FirstThread + ApLocation->Thread;\r
+\r
+        //\r
+        // Different cores may have different valid threads in them. If driver maintail clearly\r
+        // thread index in different cores, the logic will be much complicated.\r
+        // Here driver just simply records the max thread number in all cores and use it as expect\r
+        // thread number for all cores.\r
+        // In below two steps logic, first current thread will Release semaphore for each thread\r
+        // in current core. Maybe some threads are not valid in this core, but driver don't\r
+        // care. Second, driver will let current thread wait semaphore for all valid threads in\r
+        // current core. Because only the valid threads will do release semaphore for this\r
+        // thread, driver here only need to wait the valid thread count.\r
+        //\r
+\r
+        //\r
+        // First Notify ALL THREADs in current Core that this thread is ready.\r
+        //\r
+        for (ProcessorIndex = 0; ProcessorIndex < CpuStatus->MaxThreadCount; ProcessorIndex ++) {\r
+          LibReleaseSemaphore (&SemaphorePtr[FirstThread + ProcessorIndex]);\r
+        }\r
+        //\r
+        // Second, check whether all VALID THREADs (not all threads) in current core are ready.\r
+        //\r
+        for (ProcessorIndex = 0; ProcessorIndex < ThreadCountPerCore[CurrentCore]; ProcessorIndex ++) {\r
+          LibWaitForSemaphore (&SemaphorePtr[CurrentThread]);\r
+        }\r
+        break;\r
+\r
+      case PackageDepType:\r
+        SemaphorePtr = CpuFlags->PackageSemaphoreCount;\r
+        ThreadCountPerPackage = (UINT32 *)(UINTN)CpuStatus->ThreadCountPerPackage;\r
+        //\r
+        // Get Offset info for the first thread in the package which current thread belongs to.\r
+        //\r
+        FirstThread = ApLocation->Package * CpuStatus->MaxCoreCount * CpuStatus->MaxThreadCount;\r
+        //\r
+        // Get the possible threads count for current package.\r
+        //\r
+        CurrentThread = FirstThread + CpuStatus->MaxThreadCount * ApLocation->Core + ApLocation->Thread;\r
+\r
+        //\r
+        // Different packages may have different valid threads in them. If driver maintail clearly\r
+        // thread index in different packages, the logic will be much complicated.\r
+        // Here driver just simply records the max thread number in all packages and use it as expect\r
+        // thread number for all packages.\r
+        // In below two steps logic, first current thread will Release semaphore for each thread\r
+        // in current package. Maybe some threads are not valid in this package, but driver don't\r
+        // care. Second, driver will let current thread wait semaphore for all valid threads in\r
+        // current package. Because only the valid threads will do release semaphore for this\r
+        // thread, driver here only need to wait the valid thread count.\r
+        //\r
+\r
+        //\r
+        // First Notify ALL THREADS in current package that this thread is ready.\r
+        //\r
+        for (ProcessorIndex = 0; ProcessorIndex < CpuStatus->MaxThreadCount * CpuStatus->MaxCoreCount; ProcessorIndex ++) {\r
+          LibReleaseSemaphore (&SemaphorePtr[FirstThread + ProcessorIndex]);\r
+        }\r
+        //\r
+        // Second, check whether VALID THREADS (not all threads) in current package are ready.\r
+        //\r
+        for (ProcessorIndex = 0; ProcessorIndex < ThreadCountPerPackage[ApLocation->Package]; ProcessorIndex ++) {\r
+          LibWaitForSemaphore (&SemaphorePtr[CurrentThread]);\r
+        }\r
+        break;\r
+\r
+      default:\r
+        break;\r
+      }\r
+      break;\r
+\r
     default:\r
       break;\r
     }\r
@@ -718,10 +1120,37 @@ SetProcessorRegister (
   IN OUT VOID            *Buffer\r
   )\r
 {\r
-  UINTN                  ProcessorNumber;\r
+  CPU_FEATURES_DATA         *CpuFeaturesData;\r
+  CPU_REGISTER_TABLE        *RegisterTable;\r
+  CPU_REGISTER_TABLE        *RegisterTables;\r
+  UINT32                    InitApicId;\r
+  UINTN                     ProcIndex;\r
+  UINTN                     Index;\r
+  ACPI_CPU_DATA             *AcpiCpuData;\r
+\r
+  CpuFeaturesData = (CPU_FEATURES_DATA *) Buffer;\r
+  AcpiCpuData = CpuFeaturesData->AcpiCpuData;\r
 \r
-  ProcessorNumber = GetProcessorIndex ();\r
-  ProgramProcessorRegister (ProcessorNumber);\r
+  RegisterTables = (CPU_REGISTER_TABLE *)(UINTN)AcpiCpuData->CpuFeatureInitData.RegisterTable;\r
+\r
+  InitApicId = GetInitialApicId ();\r
+  RegisterTable = NULL;\r
+  ProcIndex = (UINTN)-1;\r
+  for (Index = 0; Index < AcpiCpuData->NumberOfCpus; Index++) {\r
+    if (RegisterTables[Index].InitialApicId == InitApicId) {\r
+      RegisterTable =  &RegisterTables[Index];\r
+      ProcIndex = Index;\r
+      break;\r
+    }\r
+  }\r
+  ASSERT (RegisterTable != NULL);\r
+\r
+  ProgramProcessorRegister (\r
+    RegisterTable,\r
+    (EFI_CPU_PHYSICAL_LOCATION *)(UINTN)AcpiCpuData->CpuFeatureInitData.ApLocation + ProcIndex,\r
+    &AcpiCpuData->CpuFeatureInitData.CpuStatus,\r
+    &CpuFeaturesData->CpuFlags\r
+    );\r
 }\r
 \r
 /**\r
@@ -738,59 +1167,24 @@ CpuFeaturesDetect (
   VOID\r
   )\r
 {\r
-  UINTN                  NumberOfCpus;\r
-  UINTN                  NumberOfEnabledProcessors;\r
+  CPU_FEATURES_DATA      *CpuFeaturesData;\r
 \r
-  GetNumberOfProcessor (&NumberOfCpus, &NumberOfEnabledProcessors);\r
+  CpuFeaturesData = GetCpuFeaturesData();\r
 \r
-  CpuInitDataInitialize (NumberOfCpus);\r
+  CpuInitDataInitialize ();\r
 \r
-  //\r
-  // Wakeup all APs for data collection.\r
-  //\r
-  StartupAPsWorker (CollectProcessorData);\r
+  if (CpuFeaturesData->NumberOfCpus > 1) {\r
+    //\r
+    // Wakeup all APs for data collection.\r
+    //\r
+    StartupAllAPsWorker (CollectProcessorData, NULL);\r
+  }\r
 \r
   //\r
   // Collect data on BSP\r
   //\r
-  CollectProcessorData (NULL);\r
+  CollectProcessorData (CpuFeaturesData);\r
 \r
-  AnalysisProcessorFeatures (NumberOfCpus);\r
+  AnalysisProcessorFeatures (CpuFeaturesData->NumberOfCpus);\r
 }\r
 \r
-/**\r
-  Performs CPU features Initialization.\r
-\r
-  This service will invoke MP service to perform CPU features\r
-  initialization on BSP/APs per user configuration.\r
-\r
-  @note This service could be called by BSP only.\r
-**/\r
-VOID\r
-EFIAPI\r
-CpuFeaturesInitialize (\r
-  VOID\r
-  )\r
-{\r
-  CPU_FEATURES_DATA      *CpuFeaturesData;\r
-  UINTN                  OldBspNumber;\r
-\r
-  CpuFeaturesData = GetCpuFeaturesData ();\r
-\r
-  OldBspNumber = GetProcessorIndex();\r
-  CpuFeaturesData->BspNumber = OldBspNumber;\r
-  //\r
-  // Wakeup all APs for programming.\r
-  //\r
-  StartupAPsWorker (SetProcessorRegister);\r
-  //\r
-  // Programming BSP\r
-  //\r
-  SetProcessorRegister (NULL);\r
-  //\r
-  // Switch to new BSP if required\r
-  //\r
-  if (CpuFeaturesData->BspNumber != OldBspNumber) {\r
-    SwitchNewBsp (CpuFeaturesData->BspNumber);\r
-  }\r
-}\r