Botan  2.1.0
Crypto and TLS for C++11
cpuid.cpp
Go to the documentation of this file.
1 /*
2 * Runtime CPU detection
3 * (C) 2009,2010,2013,2017 Jack Lloyd
4 *
5 * Botan is released under the Simplified BSD License (see license.txt)
6 */
7 
8 #include <botan/cpuid.h>
9 #include <botan/types.h>
10 #include <botan/loadstor.h>
11 #include <botan/exceptn.h>
12 #include <botan/mem_ops.h>
13 #include <botan/parsing.h>
14 #include <ostream>
15 
16 #if defined(BOTAN_TARGET_CPU_IS_PPC_FAMILY)
17 
18 /*
19 * On Darwin and OpenBSD ppc, use sysctl to detect AltiVec
20 */
21 #if defined(BOTAN_TARGET_OS_IS_DARWIN)
22  #include <sys/sysctl.h>
23 #elif defined(BOTAN_TARGET_OS_IS_OPENBSD)
24  #include <sys/param.h>
25  #include <sys/sysctl.h>
26  #include <machine/cpu.h>
27 #endif
28 
29 #elif defined(BOTAN_TARGET_CPU_IS_ARM_FAMILY)
30 
31 /*
32 * On ARM, use getauxval if available, otherwise fall back to
33 * running probe functions with a SIGILL handler.
34 */
35 #if defined(BOTAN_TARGET_OS_HAS_GETAUXVAL)
36  #include <sys/auxv.h>
37 #else
38  #include <botan/internal/os_utils.h>
39 #endif
40 
41 #elif defined(BOTAN_TARGET_CPU_IS_X86_FAMILY)
42 
43 /*
44 * On x86, use CPUID instruction
45 */
46 
47 #if defined(BOTAN_BUILD_COMPILER_IS_MSVC)
48  #include <intrin.h>
49 #elif defined(BOTAN_BUILD_COMPILER_IS_INTEL)
50  #include <ia32intrin.h>
51 #elif defined(BOTAN_BUILD_COMPILER_IS_GCC) || defined(BOTAN_BUILD_COMPILER_IS_CLANG)
52  #include <cpuid.h>
53 #endif
54 
55 #endif
56 
57 namespace Botan {
58 
59 uint64_t CPUID::g_processor_features = 0;
60 size_t CPUID::g_cache_line_size = BOTAN_TARGET_CPU_DEFAULT_CACHE_LINE_SIZE;
61 bool CPUID::g_little_endian = false;
62 
63 namespace {
64 
65 #if defined(BOTAN_TARGET_CPU_IS_PPC_FAMILY)
66 
67 /*
68 * PowerPC specific block: check for AltiVec using either
69 * sysctl or by reading processor version number register.
70 */
71 uint64_t powerpc_detect_cpu_featutures()
72  {
73 #if defined(BOTAN_TARGET_OS_IS_DARWIN) || defined(BOTAN_TARGET_OS_IS_OPENBSD)
74  // On Darwin/OS X and OpenBSD, use sysctl
75 
76 #if defined(BOTAN_TARGET_OS_IS_OPENBSD)
77  int sels[2] = { CTL_MACHDEP, CPU_ALTIVEC };
78 #else
79  // From Apple's docs
80  int sels[2] = { CTL_HW, HW_VECTORUNIT };
81 #endif
82  int vector_type = 0;
83  size_t length = sizeof(vector_type);
84  int error = sysctl(sels, 2, &vector_type, &length, NULL, 0);
85 
86  if(error == 0 && vector_type > 0)
87  return (1ULL << CPUID::CPUID_ALTIVEC_BIT);
88 
89 #elif defined(BOTAN_TARGET_OS_IS_LINUX) || defined(BOTAN_TARGET_OS_IS_NETBSD)
90  /*
91  On PowerPC, MSR 287 is PVR, the Processor Version Number
92  Normally it is only accessible to ring 0, but Linux and NetBSD
93  (others, too, maybe?) will trap and emulate it for us.
94 
95  PVR identifiers for various AltiVec enabled CPUs. Taken from
96  PearPC and Linux sources, mostly.
97  */
98 
99  uint32_t pvr = 0;
100 
101  // TODO: we could run inside SIGILL handler block
102  asm volatile("mfspr %0, 287" : "=r" (pvr));
103 
104  // Top 16 bit suffice to identify model
105  pvr >>= 16;
106 
107  const uint16_t PVR_G4_7400 = 0x000C;
108  const uint16_t PVR_G5_970 = 0x0039;
109  const uint16_t PVR_G5_970FX = 0x003C;
110  const uint16_t PVR_G5_970MP = 0x0044;
111  const uint16_t PVR_G5_970GX = 0x0045;
112  const uint16_t PVR_POWER6 = 0x003E;
113  const uint16_t PVR_POWER7 = 0x003F;
114  const uint16_t PVR_POWER8 = 0x004B;
115  const uint16_t PVR_CELL_PPU = 0x0070;
116 
117  if(pvr == PVR_G4_7400 ||
118  pvr == PVR_G5_970 || pvr == PVR_G5_970FX ||
119  pvr == PVR_G5_970MP || pvr == PVR_G5_970GX ||
120  pvr == PVR_POWER6 || pvr == PVR_POWER7 || pvr == PVR_POWER8 ||
121  pvr == PVR_CELL_PPU)
122  {
123  return (1ULL << CPUID::CPUID_ALTIVEC_BIT);
124  }
125 #else
126  #warning "No PowerPC feature detection available for this platform"
127 #endif
128 
129  return 0;
130  }
131 
132 #elif defined(BOTAN_TARGET_CPU_IS_ARM_FAMILY)
133 
134 uint64_t arm_detect_cpu_features(size_t* cache_line_size)
135  {
136  uint64_t detected_features = 0;
137  *cache_line_size = BOTAN_TARGET_CPU_DEFAULT_CACHE_LINE_SIZE;
138 
139 #if defined(BOTAN_TARGET_OS_HAS_GETAUXVAL)
140  errno = 0;
141 
142  /*
143  * On systems with getauxval these bits should normally be defined
144  * in bits/auxv.h but some buggy? glibc installs seem to miss them.
145  * These following values are all fixed, for the Linux ELF format,
146  * so we just hardcode them in ARM_hwcap_bit enum.
147  */
148 
149  enum ARM_hwcap_bit {
150 #if defined(BOTAN_TARGET_ARCH_IS_ARM32)
151  NEON_bit = (1 << 12),
152  AES_bit = (1 << 0),
153  PMULL_bit = (1 << 1),
154  SHA1_bit = (1 << 2),
155  SHA2_bit = (1 << 3),
156 
157  ARCH_hwcap_neon = 16, // AT_HWCAP
158  ARCH_hwcap_crypto = 26, // AT_HWCAP2
159 #elif defined(BOTAN_TARGET_ARCH_IS_ARM64)
160  NEON_bit = (1 << 1),
161  AES_bit = (1 << 3),
162  PMULL_bit = (1 << 4),
163  SHA1_bit = (1 << 5),
164  SHA2_bit = (1 << 6),
165 
166  ARCH_hwcap_neon = 16, // AT_HWCAP
167  ARCH_hwcap_crypto = 16, // AT_HWCAP
168 #endif
169  };
170 
171  const unsigned long hwcap_neon = ::getauxval(ARM_hwcap_bit::ARCH_hwcap_neon);
172  if(hwcap_neon & ARM_hwcap_bit::NEON_bit)
173  detected_features |= CPUID::CPUID_ARM_NEON_BIT;
174 
175  /*
176  On aarch64 this ends up calling getauxval twice with AT_HWCAP
177  It doesn't seem worth optimizing this out, since getauxval is
178  just reading a field in the ELF header.
179  */
180  const unsigned long hwcap_crypto = ::getauxval(ARM_hwcap_bit::ARCH_hwcap_crypto);
181  if(hwcap_crypto & ARM_hwcap_bit::AES_bit)
182  detected_features |= CPUID::CPUID_ARM_AES_BIT;
183  if(hwcap_crypto & ARM_hwcap_bit::PMULL_bit)
184  detected_features |= CPUID::CPUID_ARM_PMULL_BIT;
185  if(hwcap_crypto & ARM_hwcap_bit::SHA1_bit)
186  detected_features |= CPUID::CPUID_ARM_SHA1_BIT;
187  if(hwcap_crypto & ARM_hwcap_bit::SHA2_bit)
188  detected_features |= CPUID::CPUID_ARM_SHA2_BIT;
189 
190 #if defined(AT_DCACHEBSIZE)
191  const unsigned long dcache_line = ::getauxval(AT_DCACHEBSIZE);
192 
193  // plausibility check
194  if(dcache_line == 32 || dcache_line == 64 || dcache_line == 128)
195  *cache_line_size = static_cast<size_t>(dcache_line);
196 #endif
197 
198 #else
199  // No getauxval API available, fall back on probe functions
200 
201  // TODO: probe functions
202 
203 #endif
204 
205  return detected_features;
206  }
207 
208 #elif defined(BOTAN_TARGET_CPU_IS_X86_FAMILY)
209 
210 uint64_t x86_detect_cpu_features(size_t* cache_line_size)
211  {
212 #if defined(BOTAN_BUILD_COMPILER_IS_MSVC)
213  #define X86_CPUID(type, out) do { __cpuid((int*)out, type); } while(0)
214  #define X86_CPUID_SUBLEVEL(type, level, out) do { __cpuidex((int*)out, type, level); } while(0)
215 
216 #elif defined(BOTAN_BUILD_COMPILER_IS_INTEL)
217  #define X86_CPUID(type, out) do { __cpuid(out, type); } while(0)
218  #define X86_CPUID_SUBLEVEL(type, level, out) do { __cpuidex((int*)out, type, level); } while(0)
219 
220 #elif defined(BOTAN_TARGET_ARCH_IS_X86_64) && defined(BOTAN_USE_GCC_INLINE_ASM)
221  #define X86_CPUID(type, out) \
222  asm("cpuid\n\t" : "=a" (out[0]), "=b" (out[1]), "=c" (out[2]), "=d" (out[3]) \
223  : "0" (type))
224 
225  #define X86_CPUID_SUBLEVEL(type, level, out) \
226  asm("cpuid\n\t" : "=a" (out[0]), "=b" (out[1]), "=c" (out[2]), "=d" (out[3]) \
227  : "0" (type), "2" (level))
228 
229 #elif defined(BOTAN_BUILD_COMPILER_IS_GCC) || defined(BOTAN_BUILD_COMPILER_IS_CLANG)
230  #define X86_CPUID(type, out) do { __get_cpuid(type, out, out+1, out+2, out+3); } while(0)
231 
232  #define X86_CPUID_SUBLEVEL(type, level, out) \
233  do { __cpuid_count(type, level, out[0], out[1], out[2], out[3]); } while(0)
234 #else
235  #warning "No way of calling x86 cpuid instruction for this compiler"
236  #define X86_CPUID(type, out) do { clear_mem(out, 4); } while(0)
237  #define X86_CPUID_SUBLEVEL(type, level, out) do { clear_mem(out, 4); } while(0)
238 #endif
239 
240  uint64_t features_detected = 0;
241  uint32_t cpuid[4] = { 0 };
242 
243  // CPUID 0: vendor identification, max sublevel
244  X86_CPUID(0, cpuid);
245 
246  const uint32_t max_supported_sublevel = cpuid[0];
247 
248  const uint32_t INTEL_CPUID[3] = { 0x756E6547, 0x6C65746E, 0x49656E69 };
249  const uint32_t AMD_CPUID[3] = { 0x68747541, 0x444D4163, 0x69746E65 };
250  const bool is_intel = same_mem(cpuid + 1, INTEL_CPUID, 3);
251  const bool is_amd = same_mem(cpuid + 1, AMD_CPUID, 3);
252 
253  if(max_supported_sublevel >= 1)
254  {
255  // CPUID 1: feature bits
256  X86_CPUID(1, cpuid);
257  const uint64_t flags0 = (static_cast<uint64_t>(cpuid[2]) << 32) | cpuid[3];
258 
259  enum x86_CPUID_1_bits : uint64_t {
260  RDTSC = (1ULL << 4),
261  SSE2 = (1ULL << 26),
262  CLMUL = (1ULL << 33),
263  SSSE3 = (1ULL << 41),
264  SSE41 = (1ULL << 51),
265  SSE42 = (1ULL << 52),
266  AESNI = (1ULL << 57),
267  RDRAND = (1ULL << 62)
268  };
269 
270  if(flags0 & x86_CPUID_1_bits::RDTSC)
271  features_detected |= CPUID::CPUID_RDTSC_BIT;
272  if(flags0 & x86_CPUID_1_bits::SSE2)
273  features_detected |= CPUID::CPUID_SSE2_BIT;
274  if(flags0 & x86_CPUID_1_bits::CLMUL)
275  features_detected |= CPUID::CPUID_CLMUL_BIT;
276  if(flags0 & x86_CPUID_1_bits::SSSE3)
277  features_detected |= CPUID::CPUID_SSSE3_BIT;
278  if(flags0 & x86_CPUID_1_bits::SSE41)
279  features_detected |= CPUID::CPUID_SSE41_BIT;
280  if(flags0 & x86_CPUID_1_bits::SSE42)
281  features_detected |= CPUID::CPUID_SSE42_BIT;
282  if(flags0 & x86_CPUID_1_bits::AESNI)
283  features_detected |= CPUID::CPUID_AESNI_BIT;
284  if(flags0 & x86_CPUID_1_bits::RDRAND)
285  features_detected |= CPUID::CPUID_RDRAND_BIT;
286  }
287 
288  if(is_intel)
289  {
290  // Intel cache line size is in cpuid(1) output
291  *cache_line_size = 8 * get_byte(2, cpuid[1]);
292  }
293  else if(is_amd)
294  {
295  // AMD puts it in vendor zone
296  X86_CPUID(0x80000005, cpuid);
297  *cache_line_size = get_byte(3, cpuid[2]);
298  }
299 
300  if(max_supported_sublevel >= 7)
301  {
302  clear_mem(cpuid, 4);
303  X86_CPUID_SUBLEVEL(7, 0, cpuid);
304 
305  enum x86_CPUID_7_bits : uint64_t {
306  AVX2 = (1ULL << 5),
307  BMI2 = (1ULL << 8),
308  AVX512F = (1ULL << 16),
309  RDSEED = (1ULL << 18),
310  ADX = (1ULL << 19),
311  SHA = (1ULL << 29),
312  };
313  uint64_t flags7 = (static_cast<uint64_t>(cpuid[2]) << 32) | cpuid[1];
314 
315  if(flags7 & x86_CPUID_7_bits::AVX2)
316  features_detected |= CPUID::CPUID_AVX2_BIT;
317  if(flags7 & x86_CPUID_7_bits::BMI2)
318  features_detected |= CPUID::CPUID_BMI2_BIT;
319  if(flags7 & x86_CPUID_7_bits::AVX512F)
320  features_detected |= CPUID::CPUID_AVX512F_BIT;
321  if(flags7 & x86_CPUID_7_bits::RDSEED)
322  features_detected |= CPUID::CPUID_RDSEED_BIT;
323  if(flags7 & x86_CPUID_7_bits::ADX)
324  features_detected |= CPUID::CPUID_ADX_BIT;
325  if(flags7 & x86_CPUID_7_bits::SHA)
326  features_detected |= CPUID::CPUID_SHA_BIT;
327  }
328 
329 #undef X86_CPUID
330 #undef X86_CPUID_SUBLEVEL
331 
332  /*
333  * If we don't have access to CPUID, we can still safely assume that
334  * any x86-64 processor has SSE2 and RDTSC
335  */
336 #if defined(BOTAN_TARGET_ARCH_IS_X86_64)
337  if(features_detected == 0)
338  {
339  features_detected |= CPUID::CPUID_SSE2_BIT;
340  features_detected |= CPUID::CPUID_RDTSC_BIT;
341  }
342 #endif
343 
344  return features_detected;
345  }
346 
347 #endif
348 
349 }
350 
352  {
353 #if defined(BOTAN_TARGET_SUPPORTS_SSE2)
354  return CPUID::has_sse2();
355 #elif defined(BOTAN_TARGET_SUPPORTS_ALTIVEC)
356  return CPUID::has_altivec();
357 #elif defined(BOTAN_TARGET_SUPPORTS_NEON)
358  return CPUID::has_neon();
359 #else
360  return true;
361 #endif
362  }
363 
364 //static
365 std::string CPUID::to_string()
366  {
367  std::vector<std::string> flags;
368 
369 #define CPUID_PRINT(flag) do { if(has_##flag()) { flags.push_back(#flag); } } while(0)
370 
371 #if defined(BOTAN_TARGET_CPU_IS_X86_FAMILY)
372  CPUID_PRINT(sse2);
373  CPUID_PRINT(ssse3);
374  CPUID_PRINT(sse41);
375  CPUID_PRINT(sse42);
376  CPUID_PRINT(avx2);
377  CPUID_PRINT(avx512f);
378 
379  CPUID_PRINT(rdtsc);
380  CPUID_PRINT(bmi2);
381  CPUID_PRINT(adx);
382 
383  CPUID_PRINT(aes_ni);
384  CPUID_PRINT(clmul);
385  CPUID_PRINT(rdrand);
386  CPUID_PRINT(rdseed);
387  CPUID_PRINT(intel_sha);
388 #endif
389 
390 #if defined(BOTAN_TARGET_CPU_IS_PPC_FAMILY)
391  CPUID_PRINT(altivec);
392 #endif
393 
394 #if defined(BOTAN_TARGET_CPU_IS_ARM_FAMILY)
395  CPUID_PRINT(neon);
396  CPUID_PRINT(arm_sha1);
397  CPUID_PRINT(arm_sha2);
398  CPUID_PRINT(arm_aes);
399  CPUID_PRINT(arm_pmull);
400 #endif
401 
402 #undef CPUID_PRINT
403 
404  return string_join(flags, ' ');
405  }
406 
407 //static
408 void CPUID::print(std::ostream& o)
409  {
410  o << "CPUID flags: " << CPUID::to_string() << "\n";
411  }
412 
414  {
415  g_processor_features = 0;
416 
417 #if defined(BOTAN_TARGET_CPU_IS_PPC_FAMILY)
418  g_processor_features = powerpc_detect_cpu_featutures();
419 #elif defined(BOTAN_TARGET_CPU_IS_ARM_FAMILY)
420  g_processor_features = arm_detect_cpu_features(&g_cache_line_size);
421 #elif defined(BOTAN_TARGET_CPU_IS_X86_FAMILY)
422  g_processor_features = x86_detect_cpu_features(&g_cache_line_size);
423 #endif
424 
425  g_processor_features |= CPUID::CPUID_INITIALIZED_BIT;
426 
427  // Check runtime endian
428  const uint32_t endian32 = 0x01234567;
429  const uint8_t* e8 = reinterpret_cast<const uint8_t*>(&endian32);
430 
431  if(e8[0] == 0x01 && e8[1] == 0x23 && e8[2] == 0x45 && e8[3] == 0x67)
432  {
433  g_little_endian = false;
434  }
435  else if(e8[0] == 0x67 && e8[1] == 0x45 && e8[2] == 0x23 && e8[3] == 0x01)
436  {
437  g_little_endian = true;
438  }
439  else
440  {
441  throw Internal_Error("Unexpected endian at runtime, neither big nor little");
442  }
443 
444  // If we were compiled with a known endian, verify it matches at runtime
445 #if defined(BOTAN_TARGET_CPU_IS_LITTLE_ENDIAN)
446  BOTAN_ASSERT(g_little_endian == true, "Build and runtime endian match");
447 #elif defined(BOTAN_TARGET_CPU_IS_BIG_ENDIAN)
448  BOTAN_ASSERT(g_little_endian == false, "Build and runtime endian match");
449 #endif
450 
451  }
452 
453 }
std::string string_join(const std::vector< std::string > &strs, char delim)
Definition: parsing.cpp:172
bool same_mem(const T *p1, const T *p2, size_t n)
Definition: mem_ops.h:98
void clear_mem(T *ptr, size_t n)
Definition: mem_ops.h:57
Flags flags(Flag flags)
Definition: p11.h:858
static bool has_simd_32()
Definition: cpuid.cpp:351
#define BOTAN_ASSERT(expr, assertion_made)
Definition: assert.h:27
static std::string to_string()
Definition: cpuid.cpp:365
Definition: alg_id.cpp:13
static void print(std::ostream &o)
Definition: cpuid.cpp:408
static void initialize()
Definition: cpuid.cpp:413
uint8_t get_byte(size_t byte_num, T input)
Definition: loadstor.h:47
#define CPUID_PRINT(flag)