High-Performance Tensor Transposition (HPTT) C++ Library
A C++ library for high-performance multi-threaded tensor transpositions.
hptt.h
1/*
2 * Copyright (C) 2017 Paul Springer (springer@aices.rwth-aachen.de)
3 *
4 * This program is free software: you can redistribute it and/or modify
5 * it under the terms of the GNU Lesser General Public License as published by
6 * the Free Software Foundation, either version 3 of the License, or
7 * (at your option) any later version.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program. If not, see <http://www.gnu.org/licenses/>.
16 */
17
141#pragma once
142
143#include <vector>
144#include <memory>
145
146#ifdef _OPENMP
147#include <omp.h>
148#endif
149
150
151#include "transpose.h"
152
153
154namespace hptt {
155
192std::shared_ptr<hptt::Transpose<float> > create_plan( const int *perm, const int dim,
193 const float alpha, const float *A, const int *sizeA, const int *outerSizeA,
194 const float beta, float *B, const int *outerSizeB,
195 const SelectionMethod selectionMethod,
196 const int numThreads, const int *threadIds = nullptr, const bool useRowMajor = false);
197
198std::shared_ptr<hptt::Transpose<double> > create_plan( const int *perm, const int dim,
199 const double alpha, const double *A, const int *sizeA, const int *outerSizeA,
200 const double beta, double *B, const int *outerSizeB,
201 const SelectionMethod selectionMethod,
202 const int numThreads, const int *threadIds = nullptr, const bool useRowMajor = false);
203
204std::shared_ptr<hptt::Transpose<FloatComplex> > create_plan( const int *perm, const int dim,
205 const FloatComplex alpha, const FloatComplex *A, const int *sizeA, const int *outerSizeA,
206 const FloatComplex beta, FloatComplex *B, const int *outerSizeB,
207 const SelectionMethod selectionMethod,
208 const int numThreads, const int *threadIds = nullptr, const bool useRowMajor = false);
209
210std::shared_ptr<hptt::Transpose<DoubleComplex> > create_plan( const int *perm, const int dim,
211 const DoubleComplex alpha, const DoubleComplex *A, const int *sizeA, const int *outerSizeA,
212 const DoubleComplex beta, DoubleComplex *B, const int *outerSizeB,
213 const SelectionMethod selectionMethod,
214 const int numThreads, const int *threadIds = nullptr, const bool useRowMajor = false);
215
216
217std::shared_ptr<hptt::Transpose<float> > create_plan( const std::vector<int> &perm, const int dim,
218 const float alpha, const float *A, const std::vector<int> &sizeA, const std::vector<int> &outerSizeA,
219 const float beta, float *B, const std::vector<int> &outerSizeB,
220 const SelectionMethod selectionMethod,
221 const int numThreads, const std::vector<int> &threadIds = {}, const bool useRowMajor = false);
222
223std::shared_ptr<hptt::Transpose<double> > create_plan( const std::vector<int> &perm, const int dim,
224 const double alpha, const double *A, const std::vector<int> &sizeA, const std::vector<int> &outerSizeA,
225 const double beta, double *B, const std::vector<int> &outerSizeB,
226 const SelectionMethod selectionMethod,
227 const int numThreads, const std::vector<int> &threadIds = {}, const bool useRowMajor = false);
228
229std::shared_ptr<hptt::Transpose<FloatComplex> > create_plan( const std::vector<int> &perm, const int dim,
230 const FloatComplex alpha, const FloatComplex *A, const std::vector<int> &sizeA, const std::vector<int> &outerSizeA,
231 const FloatComplex beta, FloatComplex *B, const std::vector<int> &outerSizeB,
232 const SelectionMethod selectionMethod,
233 const int numThreads, const std::vector<int> &threadIds = {}, const bool useRowMajor = false);
234
235std::shared_ptr<hptt::Transpose<DoubleComplex> > create_plan( const std::vector<int> &perm, const int dim,
236 const DoubleComplex alpha, const DoubleComplex *A, const std::vector<int> &sizeA, const std::vector<int> &outerSizeA,
237 const DoubleComplex beta, DoubleComplex *B, const std::vector<int> &outerSizeB,
238 const SelectionMethod selectionMethod,
239 const int numThreads, const std::vector<int> &threadIds = {}, const bool useRowMajor = false);
240
241
242
243std::shared_ptr<hptt::Transpose<float> > create_plan( const int *perm, const int dim,
244 const float alpha, const float *A, const int *sizeA, const int *outerSizeA,
245 const float beta, float *B, const int *outerSizeB,
246 const int maxAutotuningCandidates,
247 const int numThreads, const int *threadIds = nullptr, const bool useRowMajor = false);
248
249std::shared_ptr<hptt::Transpose<double> > create_plan( const int *perm, const int dim,
250 const double alpha, const double *A, const int *sizeA, const int *outerSizeA,
251 const double beta, double *B, const int *outerSizeB,
252 const int maxAutotuningCandidates,
253 const int numThreads, const int *threadIds = nullptr, const bool useRowMajor = false);
254
255std::shared_ptr<hptt::Transpose<FloatComplex> > create_plan( const int *perm, const int dim,
256 const FloatComplex alpha, const FloatComplex *A, const int *sizeA, const int *outerSizeA,
257 const FloatComplex beta, FloatComplex *B, const int *outerSizeB,
258 const int maxAutotuningCandidates,
259 const int numThreads, const int *threadIds = nullptr, const bool useRowMajor = false);
260
261std::shared_ptr<hptt::Transpose<DoubleComplex> > create_plan( const int *perm, const int dim,
262 const DoubleComplex alpha, const DoubleComplex *A, const int *sizeA, const int *outerSizeA,
263 const DoubleComplex beta, DoubleComplex *B, const int *outerSizeB,
264 const int maxAutotuningCandidates,
265 const int numThreads, const int *threadIds = nullptr, const bool useRowMajor = false);
266}
267
268extern "C"
269{
297void sTensorTranspose( const int *perm, const int dim,
298 const float alpha, const float *A, const int *sizeA, const int *outerSizeA,
299 const float beta, float *B, const int *outerSizeB,
300 const int numThreads, const int useRowMajor = 0);
301
302void dTensorTranspose( const int *perm, const int dim,
303 const double alpha, const double *A, const int *sizeA, const int *outerSizeA,
304 const double beta, double *B, const int *outerSizeB,
305 const int numThreads, const int useRowMajor = 0);
306
307void cTensorTranspose( const int *perm, const int dim,
308 const float _Complex alpha, bool conjA, const float _Complex *A, const int *sizeA, const int *outerSizeA,
309 const float _Complex beta, float _Complex *B, const int *outerSizeB,
310 const int numThreads, const int useRowMajor = 0);
311
312void zTensorTranspose( const int *perm, const int dim,
313 const double _Complex alpha, bool conjA, const double _Complex *A, const int *sizeA, const int *outerSizeA,
314 const double _Complex beta, double _Complex *B, const int *outerSizeB,
315 const int numThreads, const int useRowMajor = 0);
316}
Definition: compute_node.h:3
SelectionMethod
Determines the duration of the auto-tuning process.
Definition: hptt_types.h:22
std::shared_ptr< hptt::Transpose< float > > create_plan(const int *perm, const int dim, const float alpha, const float *A, const int *sizeA, const int *outerSizeA, const float beta, float *B, const int *outerSizeB, const SelectionMethod selectionMethod, const int numThreads, const int *threadIds=nullptr, const bool useRowMajor=false)
Creates a Tensor Transposition plan.