High-Performance Tensor Transposition (HPTT) C++ Library
A C++ library for high-performance multi-threaded tensor transpositions.
hptt Namespace Reference

Classes

class  ComputeNode
 A ComputNode encodes a loop.
 
class  Plan
 A plan encodes the execution of a tensor transposition.
 
class  Transpose
 The Transpose class encodes all information related to the execution of the tensor transposition. More...
 

Typedefs

using FloatComplex = std::complex< float >
 
using DoubleComplex = std::complex< double >
 

Enumerations

enum  SelectionMethod { ESTIMATE , MEASURE , PATIENT , CRAZY }
 Determines the duration of the auto-tuning process. More...
 

Functions

std::shared_ptr< hptt::Transpose< float > > create_plan (const int *perm, const int dim, const float alpha, const float *A, const int *sizeA, const int *outerSizeA, const float beta, float *B, const int *outerSizeB, const SelectionMethod selectionMethod, const int numThreads, const int *threadIds=nullptr, const bool useRowMajor=false)
 Creates a Tensor Transposition plan. More...
 
std::shared_ptr< hptt::Transpose< double > > create_plan (const int *perm, const int dim, const double alpha, const double *A, const int *sizeA, const int *outerSizeA, const double beta, double *B, const int *outerSizeB, const SelectionMethod selectionMethod, const int numThreads, const int *threadIds=nullptr, const bool useRowMajor=false)
 
std::shared_ptr< hptt::Transpose< FloatComplex > > create_plan (const int *perm, const int dim, const FloatComplex alpha, const FloatComplex *A, const int *sizeA, const int *outerSizeA, const FloatComplex beta, FloatComplex *B, const int *outerSizeB, const SelectionMethod selectionMethod, const int numThreads, const int *threadIds=nullptr, const bool useRowMajor=false)
 
std::shared_ptr< hptt::Transpose< DoubleComplex > > create_plan (const int *perm, const int dim, const DoubleComplex alpha, const DoubleComplex *A, const int *sizeA, const int *outerSizeA, const DoubleComplex beta, DoubleComplex *B, const int *outerSizeB, const SelectionMethod selectionMethod, const int numThreads, const int *threadIds=nullptr, const bool useRowMajor=false)
 
std::shared_ptr< hptt::Transpose< float > > create_plan (const std::vector< int > &perm, const int dim, const float alpha, const float *A, const std::vector< int > &sizeA, const std::vector< int > &outerSizeA, const float beta, float *B, const std::vector< int > &outerSizeB, const SelectionMethod selectionMethod, const int numThreads, const std::vector< int > &threadIds={}, const bool useRowMajor=false)
 
std::shared_ptr< hptt::Transpose< double > > create_plan (const std::vector< int > &perm, const int dim, const double alpha, const double *A, const std::vector< int > &sizeA, const std::vector< int > &outerSizeA, const double beta, double *B, const std::vector< int > &outerSizeB, const SelectionMethod selectionMethod, const int numThreads, const std::vector< int > &threadIds={}, const bool useRowMajor=false)
 
std::shared_ptr< hptt::Transpose< FloatComplex > > create_plan (const std::vector< int > &perm, const int dim, const FloatComplex alpha, const FloatComplex *A, const std::vector< int > &sizeA, const std::vector< int > &outerSizeA, const FloatComplex beta, FloatComplex *B, const std::vector< int > &outerSizeB, const SelectionMethod selectionMethod, const int numThreads, const std::vector< int > &threadIds={}, const bool useRowMajor=false)
 
std::shared_ptr< hptt::Transpose< DoubleComplex > > create_plan (const std::vector< int > &perm, const int dim, const DoubleComplex alpha, const DoubleComplex *A, const std::vector< int > &sizeA, const std::vector< int > &outerSizeA, const DoubleComplex beta, DoubleComplex *B, const std::vector< int > &outerSizeB, const SelectionMethod selectionMethod, const int numThreads, const std::vector< int > &threadIds={}, const bool useRowMajor=false)
 
std::shared_ptr< hptt::Transpose< float > > create_plan (const int *perm, const int dim, const float alpha, const float *A, const int *sizeA, const int *outerSizeA, const float beta, float *B, const int *outerSizeB, const int maxAutotuningCandidates, const int numThreads, const int *threadIds=nullptr, const bool useRowMajor=false)
 
std::shared_ptr< hptt::Transpose< double > > create_plan (const int *perm, const int dim, const double alpha, const double *A, const int *sizeA, const int *outerSizeA, const double beta, double *B, const int *outerSizeB, const int maxAutotuningCandidates, const int numThreads, const int *threadIds=nullptr, const bool useRowMajor=false)
 
std::shared_ptr< hptt::Transpose< FloatComplex > > create_plan (const int *perm, const int dim, const FloatComplex alpha, const FloatComplex *A, const int *sizeA, const int *outerSizeA, const FloatComplex beta, FloatComplex *B, const int *outerSizeB, const int maxAutotuningCandidates, const int numThreads, const int *threadIds=nullptr, const bool useRowMajor=false)
 
std::shared_ptr< hptt::Transpose< DoubleComplex > > create_plan (const int *perm, const int dim, const DoubleComplex alpha, const DoubleComplex *A, const int *sizeA, const int *outerSizeA, const DoubleComplex beta, DoubleComplex *B, const int *outerSizeB, const int maxAutotuningCandidates, const int numThreads, const int *threadIds=nullptr, const bool useRowMajor=false)
 
template<>
float conj (float x)
 
template<>
double conj (double x)
 
template<>
double getZeroThreshold< double > ()
 
template<>
double getZeroThreshold< DoubleComplex > ()
 
template<>
double getZeroThreshold< float > ()
 
template<>
double getZeroThreshold< FloatComplex > ()
 
void trashCache (double *A, double *B, int n)
 
template<typename t >
int hasItem (const std::vector< t > &vec, t value)
 
template<typename t >
void printVector (const std::vector< t > &vec, const char *label)
 
template<typename t >
void printVector (const std::list< t > &vec, const char *label)
 
void getPrimeFactors (int n, std::list< int > &primeFactors)
 
template<typename t >
int findPos (t value, const std::vector< t > &array)
 
int findPos (int value, const int *array, int n)
 
int factorial (int n)
 
void accountForRowMajor (const int *sizeA, const int *outerSizeA, const int *outerSizeB, const int *perm, int *tmpSizeA, int *tmpOuterSizeA, int *tmpouterSizeB, int *tmpPerm, const int dim, const bool useRowMajor)
 

Detailed Description

Enumeration Type Documentation

◆ SelectionMethod

Determines the duration of the auto-tuning process.

  • ESTIMATE: 0 seconds (i.e., no auto-tuning)
  • MEASURE: 10 seconds
  • PATIENT: 60 seconds
  • CRAZY : 3600 seconds

Function Documentation

◆ create_plan()

std::shared_ptr< hptt::Transpose< float > > hptt::create_plan ( const int *  perm,
const int  dim,
const float  alpha,
const float *  A,
const int *  sizeA,
const int *  outerSizeA,
const float  beta,
float *  B,
const int *  outerSizeB,
const SelectionMethod  selectionMethod,
const int  numThreads,
const int *  threadIds = nullptr,
const bool  useRowMajor = false 
)

Creates a Tensor Transposition plan.

A tensor transposition plan is a data structure that encodes the execution of the tensor transposition. HPTT supports tensor transpositions of the form:

\[ B_{\pi(i_0,i_1,...)} = \alpha * A_{i_0,i_1,...} + \beta * B_{\pi(i_0,i_1,...)}. \]

The plan can be reused over several transpositions.

Parameters
[in]permdim-dimensional array representing the permutation of the indices.
  • For instance, perm[] = {1,0,2} denotes the following transposition: $B_{i1,i0,i2} \gets A_{i0,i1,i2}$.
[in]dimDimensionality of the tensors
[in]alphascaling factor for A
[in]APointer to the raw-data of the input tensor A
[in]sizeAdim-dimensional array that stores the sizes of each dimension of A
[in]outerSizeAdim-dimensional array that stores the outer-sizes of each dimension of A.
  • This parameter may be NULL, indicating that the outer-size is equal to sizeA.
  • If outerSizeA is not NULL, outerSizeA[i] >= sizeA[i] for all 0 <= i < dim must hold.
  • This option enables HPTT to operate on sub-tensors.
[in]betascaling factor for B
[in,out]BPointer to the raw-data of the output tensor B
[in]outerSizeBdim-dimensional array that stores the outer-sizes of each dimension of B.
  • This parameter may be NULL, indicating that the outer-size is equal to the perm(sizeA).
  • If outerSizeA is not NULL, outerSizeB[i] >= perm(sizeA)[i] for all 0 <= i < dim must hold.
  • This option enables HPTT to operate on sub-tensors.
[in]selectionMethodDetermines if auto-tuning should be used. See hptt::SelectionMethod for details. ATTENTION: If you enable auto-tuning (e.g., hptt::MEASURE) then the output data will be used during the auto-tuning process. The original data (i.e., A and B), however, is preserved after this function call completes – unless your input data (i.e. A) has invalid data (e.g., NaN, inf).
[in]numThreadsnumber of threads that participate in this tensor transposition.
[in]threadIdsArray of OpenMP threadIds that participate in this tensor transposition. This parameter is only important if you want to call HPTT from within a parallel region (i.e., via execute_expert()).
[in]useRowMajorThis flag indicates whether a row-major memory layout should be used (default: off = column-major).