151#include "transpose.h"
192std::shared_ptr<hptt::Transpose<float> >
create_plan(
const int *perm,
const int dim,
193 const float alpha,
const float *A,
const int *sizeA,
const int *outerSizeA,
194 const float beta,
float *B,
const int *outerSizeB,
196 const int numThreads,
const int *threadIds =
nullptr,
const bool useRowMajor =
false);
198std::shared_ptr<hptt::Transpose<double> >
create_plan(
const int *perm,
const int dim,
199 const double alpha,
const double *A,
const int *sizeA,
const int *outerSizeA,
200 const double beta,
double *B,
const int *outerSizeB,
202 const int numThreads,
const int *threadIds =
nullptr,
const bool useRowMajor =
false);
204std::shared_ptr<hptt::Transpose<FloatComplex> >
create_plan(
const int *perm,
const int dim,
205 const FloatComplex alpha,
const FloatComplex *A,
const int *sizeA,
const int *outerSizeA,
206 const FloatComplex beta, FloatComplex *B,
const int *outerSizeB,
208 const int numThreads,
const int *threadIds =
nullptr,
const bool useRowMajor =
false);
210std::shared_ptr<hptt::Transpose<DoubleComplex> >
create_plan(
const int *perm,
const int dim,
211 const DoubleComplex alpha,
const DoubleComplex *A,
const int *sizeA,
const int *outerSizeA,
212 const DoubleComplex beta, DoubleComplex *B,
const int *outerSizeB,
214 const int numThreads,
const int *threadIds =
nullptr,
const bool useRowMajor =
false);
217std::shared_ptr<hptt::Transpose<float> >
create_plan(
const std::vector<int> &perm,
const int dim,
218 const float alpha,
const float *A,
const std::vector<int> &sizeA,
const std::vector<int> &outerSizeA,
219 const float beta,
float *B,
const std::vector<int> &outerSizeB,
221 const int numThreads,
const std::vector<int> &threadIds = {},
const bool useRowMajor =
false);
223std::shared_ptr<hptt::Transpose<double> >
create_plan(
const std::vector<int> &perm,
const int dim,
224 const double alpha,
const double *A,
const std::vector<int> &sizeA,
const std::vector<int> &outerSizeA,
225 const double beta,
double *B,
const std::vector<int> &outerSizeB,
227 const int numThreads,
const std::vector<int> &threadIds = {},
const bool useRowMajor =
false);
229std::shared_ptr<hptt::Transpose<FloatComplex> >
create_plan(
const std::vector<int> &perm,
const int dim,
230 const FloatComplex alpha,
const FloatComplex *A,
const std::vector<int> &sizeA,
const std::vector<int> &outerSizeA,
231 const FloatComplex beta, FloatComplex *B,
const std::vector<int> &outerSizeB,
233 const int numThreads,
const std::vector<int> &threadIds = {},
const bool useRowMajor =
false);
235std::shared_ptr<hptt::Transpose<DoubleComplex> >
create_plan(
const std::vector<int> &perm,
const int dim,
236 const DoubleComplex alpha,
const DoubleComplex *A,
const std::vector<int> &sizeA,
const std::vector<int> &outerSizeA,
237 const DoubleComplex beta, DoubleComplex *B,
const std::vector<int> &outerSizeB,
239 const int numThreads,
const std::vector<int> &threadIds = {},
const bool useRowMajor =
false);
243std::shared_ptr<hptt::Transpose<float> >
create_plan(
const int *perm,
const int dim,
244 const float alpha,
const float *A,
const int *sizeA,
const int *outerSizeA,
245 const float beta,
float *B,
const int *outerSizeB,
246 const int maxAutotuningCandidates,
247 const int numThreads,
const int *threadIds =
nullptr,
const bool useRowMajor =
false);
249std::shared_ptr<hptt::Transpose<double> >
create_plan(
const int *perm,
const int dim,
250 const double alpha,
const double *A,
const int *sizeA,
const int *outerSizeA,
251 const double beta,
double *B,
const int *outerSizeB,
252 const int maxAutotuningCandidates,
253 const int numThreads,
const int *threadIds =
nullptr,
const bool useRowMajor =
false);
255std::shared_ptr<hptt::Transpose<FloatComplex> >
create_plan(
const int *perm,
const int dim,
256 const FloatComplex alpha,
const FloatComplex *A,
const int *sizeA,
const int *outerSizeA,
257 const FloatComplex beta, FloatComplex *B,
const int *outerSizeB,
258 const int maxAutotuningCandidates,
259 const int numThreads,
const int *threadIds =
nullptr,
const bool useRowMajor =
false);
261std::shared_ptr<hptt::Transpose<DoubleComplex> >
create_plan(
const int *perm,
const int dim,
262 const DoubleComplex alpha,
const DoubleComplex *A,
const int *sizeA,
const int *outerSizeA,
263 const DoubleComplex beta, DoubleComplex *B,
const int *outerSizeB,
264 const int maxAutotuningCandidates,
265 const int numThreads,
const int *threadIds =
nullptr,
const bool useRowMajor =
false);
297void sTensorTranspose(
const int *perm,
const int dim,
298 const float alpha,
const float *A,
const int *sizeA,
const int *outerSizeA,
299 const float beta,
float *B,
const int *outerSizeB,
300 const int numThreads,
const int useRowMajor = 0);
302void dTensorTranspose(
const int *perm,
const int dim,
303 const double alpha,
const double *A,
const int *sizeA,
const int *outerSizeA,
304 const double beta,
double *B,
const int *outerSizeB,
305 const int numThreads,
const int useRowMajor = 0);
307void cTensorTranspose(
const int *perm,
const int dim,
308 const float _Complex alpha,
bool conjA,
const float _Complex *A,
const int *sizeA,
const int *outerSizeA,
309 const float _Complex beta,
float _Complex *B,
const int *outerSizeB,
310 const int numThreads,
const int useRowMajor = 0);
312void zTensorTranspose(
const int *perm,
const int dim,
313 const double _Complex alpha,
bool conjA,
const double _Complex *A,
const int *sizeA,
const int *outerSizeA,
314 const double _Complex beta,
double _Complex *B,
const int *outerSizeB,
315 const int numThreads,
const int useRowMajor = 0);
Definition: compute_node.h:3
SelectionMethod
Determines the duration of the auto-tuning process.
Definition: hptt_types.h:22
std::shared_ptr< hptt::Transpose< float > > create_plan(const int *perm, const int dim, const float alpha, const float *A, const int *sizeA, const int *outerSizeA, const float beta, float *B, const int *outerSizeB, const SelectionMethod selectionMethod, const int numThreads, const int *threadIds=nullptr, const bool useRowMajor=false)
Creates a Tensor Transposition plan.