High-Performance Tensor Transposition (HPTT) C++ Library
A C++ library for high-performance multi-threaded tensor transpositions.
macros.h
1
20#pragma once
21
22#ifdef DEBUG
23#define HPTT_ERROR_INFO(str) fprintf(stdout, "[INFO] %s:%d : %s\n", __FILE__, __LINE__, str); exit(-1);
24#else
25#define HPTT_ERROR_INFO(str)
26#endif
27
28#if defined(__ICC) || defined(__INTEL_COMPILER)
29#define INLINE __forceinline
30#elif defined(__GNUC__) || defined(__GNUG__)
31#define INLINE __attribute__((always_inline)) inline
32#endif
33
34#ifdef _OPENMP
35
36#define HPTT_DUPLICATE_2(condition, ...) \
37if (condition) { _Pragma("omp parallel for num_threads(numThreads) collapse(2)") \
38 __VA_ARGS__ } \
39else { __VA_ARGS__ }
40
41#define HPTT_DUPLICATE(condition, ...) \
42if (condition) { _Pragma("omp parallel for num_threads(numThreads)") \
43 __VA_ARGS__ } \
44else { __VA_ARGS__ }
45
46#else
47
48#define HPTT_DUPLICATE(condition, ...) { __VA_ARGS__ }
49#define HPTT_DUPLICATE_2(condition, ...) { __VA_ARGS__ }
50
51#endif