Botan  2.19.1
Crypto and TLS for C++11
cpuid_x86.cpp
Go to the documentation of this file.
1 /*
2 * Runtime CPU detection for x86
3 * (C) 2009,2010,2013,2017 Jack Lloyd
4 *
5 * Botan is released under the Simplified BSD License (see license.txt)
6 */
7 
8 #include <botan/cpuid.h>
9 #include <botan/mem_ops.h>
10 #include <botan/loadstor.h>
11 
12 #if defined(BOTAN_TARGET_CPU_IS_X86_FAMILY)
13 
14 #if defined(BOTAN_BUILD_COMPILER_IS_MSVC)
15  #include <intrin.h>
16 #elif defined(BOTAN_BUILD_COMPILER_IS_INTEL)
17  #include <ia32intrin.h>
18 #elif defined(BOTAN_BUILD_COMPILER_IS_GCC) || defined(BOTAN_BUILD_COMPILER_IS_CLANG)
19  #include <cpuid.h>
20 #endif
21 
22 #endif
23 
24 namespace Botan {
25 
26 #if defined(BOTAN_TARGET_CPU_IS_X86_FAMILY)
27 
28 uint64_t CPUID::CPUID_Data::detect_cpu_features(size_t* cache_line_size)
29  {
30 #if defined(BOTAN_BUILD_COMPILER_IS_MSVC)
31  #define X86_CPUID(type, out) do { __cpuid((int*)out, type); } while(0)
32  #define X86_CPUID_SUBLEVEL(type, level, out) do { __cpuidex((int*)out, type, level); } while(0)
33 
34 #elif defined(BOTAN_BUILD_COMPILER_IS_INTEL)
35  #define X86_CPUID(type, out) do { __cpuid(out, type); } while(0)
36  #define X86_CPUID_SUBLEVEL(type, level, out) do { __cpuidex((int*)out, type, level); } while(0)
37 
38 #elif defined(BOTAN_TARGET_ARCH_IS_X86_64) && defined(BOTAN_USE_GCC_INLINE_ASM)
39  #define X86_CPUID(type, out) \
40  asm("cpuid\n\t" : "=a" (out[0]), "=b" (out[1]), "=c" (out[2]), "=d" (out[3]) \
41  : "0" (type))
42 
43  #define X86_CPUID_SUBLEVEL(type, level, out) \
44  asm("cpuid\n\t" : "=a" (out[0]), "=b" (out[1]), "=c" (out[2]), "=d" (out[3]) \
45  : "0" (type), "2" (level))
46 
47 #elif defined(BOTAN_BUILD_COMPILER_IS_GCC) || defined(BOTAN_BUILD_COMPILER_IS_CLANG)
48  #define X86_CPUID(type, out) do { __get_cpuid(type, out, out+1, out+2, out+3); } while(0)
49 
50  #define X86_CPUID_SUBLEVEL(type, level, out) \
51  do { __cpuid_count(type, level, out[0], out[1], out[2], out[3]); } while(0)
52 #else
53  #warning "No way of calling x86 cpuid instruction for this compiler"
54  #define X86_CPUID(type, out) do { clear_mem(out, 4); } while(0)
55  #define X86_CPUID_SUBLEVEL(type, level, out) do { clear_mem(out, 4); } while(0)
56 #endif
57 
58  uint64_t features_detected = 0;
59  uint32_t cpuid[4] = { 0 };
60  bool has_avx = 0;
61 
62  // CPUID 0: vendor identification, max sublevel
63  X86_CPUID(0, cpuid);
64 
65  const uint32_t max_supported_sublevel = cpuid[0];
66 
67  const uint32_t INTEL_CPUID[3] = { 0x756E6547, 0x6C65746E, 0x49656E69 };
68  const uint32_t AMD_CPUID[3] = { 0x68747541, 0x444D4163, 0x69746E65 };
69  const bool is_intel = same_mem(cpuid + 1, INTEL_CPUID, 3);
70  const bool is_amd = same_mem(cpuid + 1, AMD_CPUID, 3);
71 
72  if(max_supported_sublevel >= 1)
73  {
74  // CPUID 1: feature bits
75  X86_CPUID(1, cpuid);
76  const uint64_t flags0 = (static_cast<uint64_t>(cpuid[2]) << 32) | cpuid[3];
77 
78  enum x86_CPUID_1_bits : uint64_t {
79  RDTSC = (1ULL << 4),
80  SSE2 = (1ULL << 26),
81  CLMUL = (1ULL << 33),
82  SSSE3 = (1ULL << 41),
83  SSE41 = (1ULL << 51),
84  SSE42 = (1ULL << 52),
85  AESNI = (1ULL << 57),
86  OSXSAVE = (1ULL << 59),
87  AVX = (1ULL << 60),
88  RDRAND = (1ULL << 62)
89  };
90 
91  if(flags0 & x86_CPUID_1_bits::RDTSC)
92  features_detected |= CPUID::CPUID_RDTSC_BIT;
93  if(flags0 & x86_CPUID_1_bits::SSE2)
94  features_detected |= CPUID::CPUID_SSE2_BIT;
95  if(flags0 & x86_CPUID_1_bits::CLMUL)
96  features_detected |= CPUID::CPUID_CLMUL_BIT;
97  if(flags0 & x86_CPUID_1_bits::SSSE3)
98  features_detected |= CPUID::CPUID_SSSE3_BIT;
99  if(flags0 & x86_CPUID_1_bits::SSE41)
100  features_detected |= CPUID::CPUID_SSE41_BIT;
101  if(flags0 & x86_CPUID_1_bits::SSE42)
102  features_detected |= CPUID::CPUID_SSE42_BIT;
103  if(flags0 & x86_CPUID_1_bits::AESNI)
104  features_detected |= CPUID::CPUID_AESNI_BIT;
105  if(flags0 & x86_CPUID_1_bits::RDRAND)
106  features_detected |= CPUID::CPUID_RDRAND_BIT;
107  if((flags0 & x86_CPUID_1_bits::AVX) &&
108  (flags0 & x86_CPUID_1_bits::OSXSAVE))
109  has_avx = 1;
110  }
111 
112  if(is_intel)
113  {
114  // Intel cache line size is in cpuid(1) output
115  *cache_line_size = 8 * get_byte(2, cpuid[1]);
116  }
117  else if(is_amd)
118  {
119  // AMD puts it in vendor zone
120  X86_CPUID(0x80000005, cpuid);
121  *cache_line_size = get_byte(3, cpuid[2]);
122  }
123 
124  if(max_supported_sublevel >= 7)
125  {
126  clear_mem(cpuid, 4);
127  X86_CPUID_SUBLEVEL(7, 0, cpuid);
128 
129  enum x86_CPUID_7_bits : uint64_t {
130  BMI1 = (1ULL << 3),
131  AVX2 = (1ULL << 5),
132  BMI2 = (1ULL << 8),
133  AVX512_F = (1ULL << 16),
134  AVX512_DQ = (1ULL << 17),
135  RDSEED = (1ULL << 18),
136  ADX = (1ULL << 19),
137  AVX512_IFMA = (1ULL << 21),
138  SHA = (1ULL << 29),
139  AVX512_BW = (1ULL << 30),
140  AVX512_VL = (1ULL << 31),
141  AVX512_VBMI = (1ULL << 33),
142  AVX512_VBMI2 = (1ULL << 38),
143  AVX512_VAES = (1ULL << 41),
144  AVX512_VCLMUL = (1ULL << 42),
145  AVX512_VBITALG = (1ULL << 44),
146  };
147 
148  const uint64_t flags7 = (static_cast<uint64_t>(cpuid[2]) << 32) | cpuid[1];
149 
150  if((flags7 & x86_CPUID_7_bits::AVX2) && has_avx)
151  features_detected |= CPUID::CPUID_AVX2_BIT;
152  if(flags7 & x86_CPUID_7_bits::BMI1)
153  {
154  features_detected |= CPUID::CPUID_BMI1_BIT;
155  /*
156  We only set the BMI2 bit if BMI1 is also supported, so BMI2
157  code can safely use both extensions. No known processor
158  implements BMI2 but not BMI1.
159  */
160  if(flags7 & x86_CPUID_7_bits::BMI2)
161  features_detected |= CPUID::CPUID_BMI2_BIT;
162  }
163 
164  if((flags7 & x86_CPUID_7_bits::AVX512_F) && has_avx)
165  {
166  features_detected |= CPUID::CPUID_AVX512F_BIT;
167 
168  if(flags7 & x86_CPUID_7_bits::AVX512_DQ)
169  features_detected |= CPUID::CPUID_AVX512DQ_BIT;
170  if(flags7 & x86_CPUID_7_bits::AVX512_BW)
171  features_detected |= CPUID::CPUID_AVX512BW_BIT;
172 
173  const uint64_t ICELAKE_FLAGS =
174  x86_CPUID_7_bits::AVX512_F |
175  x86_CPUID_7_bits::AVX512_DQ |
176  x86_CPUID_7_bits::AVX512_IFMA |
177  x86_CPUID_7_bits::AVX512_BW |
178  x86_CPUID_7_bits::AVX512_VL |
179  x86_CPUID_7_bits::AVX512_VBMI |
180  x86_CPUID_7_bits::AVX512_VBMI2 |
181  x86_CPUID_7_bits::AVX512_VBITALG;
182 
183  if((flags7 & ICELAKE_FLAGS) == ICELAKE_FLAGS)
184  features_detected |= CPUID::CPUID_AVX512_ICL_BIT;
185 
186  if(flags7 & x86_CPUID_7_bits::AVX512_VAES)
187  features_detected |= CPUID::CPUID_AVX512_AES_BIT;
188  if(flags7 & x86_CPUID_7_bits::AVX512_VCLMUL)
189  features_detected |= CPUID::CPUID_AVX512_CLMUL_BIT;
190  }
191 
192  if(flags7 & x86_CPUID_7_bits::RDSEED)
193  features_detected |= CPUID::CPUID_RDSEED_BIT;
194  if(flags7 & x86_CPUID_7_bits::ADX)
195  features_detected |= CPUID::CPUID_ADX_BIT;
196  if(flags7 & x86_CPUID_7_bits::SHA)
197  features_detected |= CPUID::CPUID_SHA_BIT;
198  }
199 
200 #undef X86_CPUID
201 #undef X86_CPUID_SUBLEVEL
202 
203  /*
204  * If we don't have access to CPUID, we can still safely assume that
205  * any x86-64 processor has SSE2 and RDTSC
206  */
207 #if defined(BOTAN_TARGET_ARCH_IS_X86_64)
208  if(features_detected == 0)
209  {
210  features_detected |= CPUID::CPUID_SSE2_BIT;
211  features_detected |= CPUID::CPUID_RDTSC_BIT;
212  }
213 #endif
214 
215  return features_detected;
216  }
217 
218 #endif
219 
220 }
bool same_mem(const T *p1, const T *p2, size_t n)
Definition: mem_ops.h:217
void clear_mem(T *ptr, size_t n)
Definition: mem_ops.h:115
constexpr uint8_t get_byte(size_t byte_num, T input)
Definition: loadstor.h:41
Definition: alg_id.cpp:13