[ VIGRA Homepage | Function Index | Class Index | Namespaces | File List | Main Page ]

regression.hxx VIGRA

1 /************************************************************************/
2 /* */
3 /* Copyright 2008-2013 by Ullrich Koethe */
4 /* */
5 /* This file is part of the VIGRA computer vision library. */
6 /* The VIGRA Website is */
7 /* http://hci.iwr.uni-heidelberg.de/vigra/ */
8 /* Please direct questions, bug reports, and contributions to */
9 /* ullrich.koethe@iwr.uni-heidelberg.de or */
10 /* vigra@informatik.uni-hamburg.de */
11 /* */
12 /* Permission is hereby granted, free of charge, to any person */
13 /* obtaining a copy of this software and associated documentation */
14 /* files (the "Software"), to deal in the Software without */
15 /* restriction, including without limitation the rights to use, */
16 /* copy, modify, merge, publish, distribute, sublicense, and/or */
17 /* sell copies of the Software, and to permit persons to whom the */
18 /* Software is furnished to do so, subject to the following */
19 /* conditions: */
20 /* */
21 /* The above copyright notice and this permission notice shall be */
22 /* included in all copies or substantial portions of the */
23 /* Software. */
24 /* */
25 /* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND */
26 /* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES */
27 /* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND */
28 /* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT */
29 /* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, */
30 /* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING */
31 /* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR */
32 /* OTHER DEALINGS IN THE SOFTWARE. */
33 /* */
34 /************************************************************************/
35 
36 
37 #ifndef VIGRA_REGRESSION_HXX
38 #define VIGRA_REGRESSION_HXX
39 
40 #include "matrix.hxx"
41 #include "linear_solve.hxx"
42 #include "singular_value_decomposition.hxx"
43 #include "numerictraits.hxx"
44 #include "functorexpression.hxx"
45 #include "autodiff.hxx"
46 
47 
48 namespace vigra
49 {
50 
51 namespace linalg
52 {
53 
54 /** \addtogroup Optimization Optimization and Regression
55  */
56 //@{
57  /** Ordinary Least Squares Regression.
58 
59  Given a matrix \a A with <tt>m</tt> rows and <tt>n</tt> columns (with <tt>m >= n</tt>),
60  and a column vector \a b of length <tt>m</tt> rows, this function computes
61  the column vector \a x of length <tt>n</tt> rows that solves the optimization problem
62 
63  \f[ \tilde \textrm{\bf x} = \textrm{argmin}
64  \left|\left|\textrm{\bf A} \textrm{\bf x} - \textrm{\bf b}\right|\right|_2^2
65  \f]
66 
67  When \a b is a matrix with <tt>k</tt> columns, \a x must also have
68  <tt>k</tt> columns, which will contain the solutions for the corresponding columns of
69  \a b. Note that all matrices must already have the correct shape.
70 
71  This function is just another name for \ref linearSolve(), perhaps
72  leading to more readable code when \a A is a rectangular matrix. It returns
73  <tt>false</tt> when the rank of \a A is less than <tt>n</tt>.
74  See \ref linearSolve() for more documentation.
75 
76  <b>\#include</b> <vigra/regression.hxx><br/>
77  Namespaces: vigra and vigra::linalg
78  */
79 template <class T, class C1, class C2, class C3>
80 inline bool
83  std::string method = "QR")
84 {
85  return linearSolve(A, b, x, method);
86 }
87 
88  /** Weighted Least Squares Regression.
89 
90  Given a matrix \a A with <tt>m</tt> rows and <tt>n</tt> columns (with <tt>m >= n</tt>),
91  a vector \a b of length <tt>m</tt>, and a weight vector \a weights of length <tt>m</tt>
92  with non-negative entries, this function computes the vector \a x of length <tt>n</tt>
93  that solves the optimization problem
94 
95  \f[ \tilde \textrm{\bf x} = \textrm{argmin}
96  \left(\textrm{\bf A} \textrm{\bf x} - \textrm{\bf b}\right)^T
97  \textrm{diag}(\textrm{\bf weights})
98  \left(\textrm{\bf A} \textrm{\bf x} - \textrm{\bf b}\right)
99  \f]
100 
101  where <tt>diag(weights)</tt> creates a diagonal matrix from \a weights.
102  The algorithm calls \ref leastSquares() on the equivalent problem
103 
104  \f[ \tilde \textrm{\bf x} = \textrm{argmin}
105  \left|\left|\textrm{diag}(\textrm{\bf weights})^{1/2}\textrm{\bf A} \textrm{\bf x} -
106  \textrm{diag}(\textrm{\bf weights})^{1/2} \textrm{\bf b}\right|\right|_2^2
107  \f]
108 
109  where the square root of \a weights is just taken element-wise.
110 
111  When \a b is a matrix with <tt>k</tt> columns, \a x must also have
112  <tt>k</tt> columns, which will contain the solutions for the corresponding columns of
113  \a b. Note that all matrices must already have the correct shape.
114 
115  The function returns
116  <tt>false</tt> when the rank of the weighted matrix \a A is less than <tt>n</tt>.
117 
118  <b>\#include</b> <vigra/regression.hxx><br/>
119  Namespaces: vigra and vigra::linalg
120  */
121 template <class T, class C1, class C2, class C3, class C4>
122 bool
124  MultiArrayView<2, T, C2> const &b, MultiArrayView<2, T, C3> const &weights,
125  MultiArrayView<2, T, C4> &x, std::string method = "QR")
126 {
127  const unsigned int rows = rowCount(A);
128  const unsigned int cols = columnCount(A);
129  const unsigned int rhsCount = columnCount(b);
130  vigra_precondition(rows >= cols,
131  "weightedLeastSquares(): Input matrix A must be rectangular with rowCount >= columnCount.");
132  vigra_precondition(rowCount(b) == rows,
133  "weightedLeastSquares(): Shape mismatch between matrices A and b.");
134  vigra_precondition(rowCount(b) == rowCount(weights) && columnCount(weights) == 1,
135  "weightedLeastSquares(): Weight matrix has wrong shape.");
136  vigra_precondition(rowCount(x) == cols && columnCount(x) == rhsCount,
137  "weightedLeastSquares(): Result matrix x has wrong shape.");
138 
139  Matrix<T> wa(A.shape()), wb(b.shape());
140 
141  for(unsigned int k=0; k<rows; ++k)
142  {
143  vigra_precondition(weights(k,0) >= 0,
144  "weightedLeastSquares(): Weights must be positive.");
145  T w = std::sqrt(weights(k,0));
146  for(unsigned int l=0; l<cols; ++l)
147  wa(k,l) = w * A(k,l);
148  for(unsigned int l=0; l<rhsCount; ++l)
149  wb(k,l) = w * b(k,l);
150  }
151 
152  return leastSquares(wa, wb, x, method);
153 }
154 
155  /** Ridge Regression.
156 
157  Given a matrix \a A with <tt>m</tt> rows and <tt>n</tt> columns (with <tt>m >= n</tt>),
158  a vector \a b of length <tt>m</tt>, and a regularization parameter <tt>lambda >= 0.0</tt>,
159  this function computes the vector \a x of length <tt>n</tt>
160  that solves the optimization problem
161 
162  \f[ \tilde \textrm{\bf x} = \textrm{argmin}
163  \left|\left|\textrm{\bf A} \textrm{\bf x} - \textrm{\bf b}\right|\right|_2^2 +
164  \lambda \textrm{\bf x}^T\textrm{\bf x}
165  \f]
166 
167  This is implemented by means of \ref singularValueDecomposition().
168 
169  When \a b is a matrix with <tt>k</tt> columns, \a x must also have
170  <tt>k</tt> columns, which will contain the solutions for the corresponding columns of
171  \a b. Note that all matrices must already have the correct shape.
172 
173  The function returns <tt>false</tt> if the rank of \a A is less than <tt>n</tt>
174  and <tt>lambda == 0.0</tt>.
175 
176  <b>\#include</b> <vigra/regression.hxx><br/>
177  Namespaces: vigra and vigra::linalg
178  */
179 template <class T, class C1, class C2, class C3>
180 bool
182  MultiArrayView<2, T, C2> const &b, MultiArrayView<2, T, C3> &x, double lambda)
183 {
184  const unsigned int rows = rowCount(A);
185  const unsigned int cols = columnCount(A);
186  const unsigned int rhsCount = columnCount(b);
187  vigra_precondition(rows >= cols,
188  "ridgeRegression(): Input matrix A must be rectangular with rowCount >= columnCount.");
189  vigra_precondition(rowCount(b) == rows,
190  "ridgeRegression(): Shape mismatch between matrices A and b.");
191  vigra_precondition(rowCount(x) == cols && columnCount(x) == rhsCount,
192  "ridgeRegression(): Result matrix x has wrong shape.");
193  vigra_precondition(lambda >= 0.0,
194  "ridgeRegression(): lambda >= 0.0 required.");
195 
196  unsigned int m = rows;
197  unsigned int n = cols;
198 
199  Matrix<T> u(m, n), s(n, 1), v(n, n);
200 
201  unsigned int rank = singularValueDecomposition(A, u, s, v);
202  if(rank < n && lambda == 0.0)
203  return false;
204 
205  Matrix<T> t = transpose(u)*b;
206  for(unsigned int k=0; k<cols; ++k)
207  for(unsigned int l=0; l<rhsCount; ++l)
208  t(k,l) *= s(k,0) / (sq(s(k,0)) + lambda);
209  x = v*t;
210  return true;
211 }
212 
213  /** Weighted ridge Regression.
214 
215  Given a matrix \a A with <tt>m</tt> rows and <tt>n</tt> columns (with <tt>m >= n</tt>),
216  a vector \a b of length <tt>m</tt>, a weight vector \a weights of length <tt>m</tt>
217  with non-negative entries, and a regularization parameter <tt>lambda >= 0.0</tt>
218  this function computes the vector \a x of length <tt>n</tt>
219  that solves the optimization problem
220 
221  \f[ \tilde \textrm{\bf x} = \textrm{argmin}
222  \left(\textrm{\bf A} \textrm{\bf x} - \textrm{\bf b}\right)^T
223  \textrm{diag}(\textrm{\bf weights})
224  \left(\textrm{\bf A} \textrm{\bf x} - \textrm{\bf b}\right) +
225  \lambda \textrm{\bf x}^T\textrm{\bf x}
226  \f]
227 
228  where <tt>diag(weights)</tt> creates a diagonal matrix from \a weights.
229  The algorithm calls \ref ridgeRegression() on the equivalent problem
230 
231  \f[ \tilde \textrm{\bf x} = \textrm{argmin}
232  \left|\left|\textrm{diag}(\textrm{\bf weights})^{1/2}\textrm{\bf A} \textrm{\bf x} -
233  \textrm{diag}(\textrm{\bf weights})^{1/2} \textrm{\bf b}\right|\right|_2^2 +
234  \lambda \textrm{\bf x}^T\textrm{\bf x}
235  \f]
236 
237  where the square root of \a weights is just taken element-wise. This solution is
238  computed by means of \ref singularValueDecomposition().
239 
240  When \a b is a matrix with <tt>k</tt> columns, \a x must also have
241  <tt>k</tt> columns, which will contain the solutions for the corresponding columns of
242  \a b. Note that all matrices must already have the correct shape.
243 
244  The function returns <tt>false</tt> if the rank of \a A is less than <tt>n</tt>
245  and <tt>lambda == 0.0</tt>.
246 
247  <b>\#include</b> <vigra/regression.hxx><br/>
248  Namespaces: vigra and vigra::linalg
249  */
250 template <class T, class C1, class C2, class C3, class C4>
251 bool
253  MultiArrayView<2, T, C2> const &b, MultiArrayView<2, T, C3> const &weights,
254  MultiArrayView<2, T, C4> &x, double lambda)
255 {
256  const unsigned int rows = rowCount(A);
257  const unsigned int cols = columnCount(A);
258  const unsigned int rhsCount = columnCount(b);
259  vigra_precondition(rows >= cols,
260  "weightedRidgeRegression(): Input matrix A must be rectangular with rowCount >= columnCount.");
261  vigra_precondition(rowCount(b) == rows,
262  "weightedRidgeRegression(): Shape mismatch between matrices A and b.");
263  vigra_precondition(rowCount(b) == rowCount(weights) && columnCount(weights) == 1,
264  "weightedRidgeRegression(): Weight matrix has wrong shape.");
265  vigra_precondition(rowCount(x) == cols && columnCount(x) == rhsCount,
266  "weightedRidgeRegression(): Result matrix x has wrong shape.");
267  vigra_precondition(lambda >= 0.0,
268  "weightedRidgeRegression(): lambda >= 0.0 required.");
269 
270  Matrix<T> wa(A.shape()), wb(b.shape());
271 
272  for(unsigned int k=0; k<rows; ++k)
273  {
274  vigra_precondition(weights(k,0) >= 0,
275  "weightedRidgeRegression(): Weights must be positive.");
276  T w = std::sqrt(weights(k,0));
277  for(unsigned int l=0; l<cols; ++l)
278  wa(k,l) = w * A(k,l);
279  for(unsigned int l=0; l<rhsCount; ++l)
280  wb(k,l) = w * b(k,l);
281  }
282 
283  return ridgeRegression(wa, wb, x, lambda);
284 }
285 
286  /** Ridge Regression with many lambdas.
287 
288  This executes \ref ridgeRegression() for a sequence of regularization parameters. This
289  is implemented so that the \ref singularValueDecomposition() has to be executed only once.
290  \a lambda must be an array conforming to the <tt>std::vector</tt> interface, i.e. must
291  support <tt>lambda.size()</tt> and <tt>lambda[k]</tt>. The columns of the matrix \a x
292  will contain the solutions for the corresponding lambda, so the number of columns of
293  the matrix \a x must be equal to <tt>lambda.size()</tt>, and \a b must be a columns vector,
294  i.e. cannot contain several right hand sides at once.
295 
296  The function returns <tt>false</tt> when the matrix \a A is rank deficient. If this
297  happens, and one of the lambdas is zero, the corresponding column of \a x will be skipped.
298 
299  <b>\#include</b> <vigra/regression.hxx><br/>
300  Namespaces: vigra and vigra::linalg
301  */
302 template <class T, class C1, class C2, class C3, class Array>
303 bool
305  MultiArrayView<2, T, C2> const &b, MultiArrayView<2, T, C3> &x, Array const & lambda)
306 {
307  const unsigned int rows = rowCount(A);
308  const unsigned int cols = columnCount(A);
309  const unsigned int lambdaCount = lambda.size();
310  vigra_precondition(rows >= cols,
311  "ridgeRegressionSeries(): Input matrix A must be rectangular with rowCount >= columnCount.");
312  vigra_precondition(rowCount(b) == rows && columnCount(b) == 1,
313  "ridgeRegressionSeries(): Shape mismatch between matrices A and b.");
314  vigra_precondition(rowCount(x) == cols && columnCount(x) == lambdaCount,
315  "ridgeRegressionSeries(): Result matrix x has wrong shape.");
316 
317  unsigned int m = rows;
318  unsigned int n = cols;
319 
320  Matrix<T> u(m, n), s(n, 1), v(n, n);
321 
322  unsigned int rank = singularValueDecomposition(A, u, s, v);
323 
324  Matrix<T> xl = transpose(u)*b;
325  Matrix<T> xt(cols,1);
326  for(unsigned int i=0; i<lambdaCount; ++i)
327  {
328  vigra_precondition(lambda[i] >= 0.0,
329  "ridgeRegressionSeries(): lambda >= 0.0 required.");
330  if(lambda[i] == 0.0 && rank < rows)
331  continue;
332  for(unsigned int k=0; k<cols; ++k)
333  xt(k,0) = xl(k,0) * s(k,0) / (sq(s(k,0)) + lambda[i]);
334  columnVector(x, i) = v*xt;
335  }
336  return (rank == n);
337 }
338 
339 /** \brief Pass options to leastAngleRegression().
340 
341  <b>\#include</b> <vigra/regression.hxx><br/>
342  Namespaces: vigra and vigra::linalg
343 */
345 {
346  public:
347  enum Mode { LARS, LASSO, NNLASSO };
348 
349  /** Initialize all options with default values.
350  */
352  : max_solution_count(0),
353  unconstrained_dimension_count(0),
354  mode(LASSO),
355  least_squares_solutions(true)
356  {}
357 
358  /** Maximum number of solutions to be computed.
359 
360  If \a n is 0 (the default), the number of solutions is determined by the length
361  of the solution array. Otherwise, the minimum of maxSolutionCount() and that
362  length is taken.<br>
363  Default: 0 (use length of solution array)
364  */
366  {
367  max_solution_count = (int)n;
368  return *this;
369  }
370 
371  /** Set the mode of the algorithm.
372 
373  Mode must be one of "lars", "lasso", "nnlasso". The function just calls
374  the member function of the corresponding name to set the mode.
375 
376  Default: "lasso"
377  */
379  {
380  mode = tolower(mode);
381  if(mode == "lars")
382  this->lars();
383  else if(mode == "lasso")
384  this->lasso();
385  else if(mode == "nnlasso")
386  this->nnlasso();
387  else
388  vigra_fail("LeastAngleRegressionOptions.setMode(): Invalid mode.");
389  return *this;
390  }
391 
392 
393  /** Use the plain LARS algorithm.
394 
395  Default: inactive
396  */
398  {
399  mode = LARS;
400  return *this;
401  }
402 
403  /** Use the LASSO modification of the LARS algorithm.
404 
405  This allows features to be removed from the active set under certain conditions.<br>
406  Default: active
407  */
409  {
410  mode = LASSO;
411  return *this;
412  }
413 
414  /** Use the non-negative LASSO modification of the LARS algorithm.
415 
416  This enforces all non-zero entries in the solution to be positive.<br>
417  Default: inactive
418  */
420  {
421  mode = NNLASSO;
422  return *this;
423  }
424 
425  /** Compute least squares solutions.
426 
427  Use least angle regression to determine active sets, but
428  return least squares solutions for the features in each active set,
429  instead of constrained solutions.<br>
430  Default: <tt>true</tt>
431  */
433  {
434  least_squares_solutions = select;
435  return *this;
436  }
437 
438  int max_solution_count, unconstrained_dimension_count;
439  Mode mode;
440  bool least_squares_solutions;
441 };
442 
443 namespace detail {
444 
445 template <class T, class C1, class C2>
446 struct LarsData
447 {
448  typedef typename MultiArrayShape<2>::type Shape;
449 
450  int activeSetSize;
453  Matrix<T> R, qtb, lars_solution, lars_prediction, next_lsq_solution, next_lsq_prediction, searchVector;
454  ArrayVector<MultiArrayIndex> columnPermutation;
455 
456  // init data for a new run
457  LarsData(MultiArrayView<2, T, C1> const & Ai, MultiArrayView<2, T, C2> const & bi)
458  : activeSetSize(1),
459  A(Ai), b(bi), R(A), qtb(b),
460  lars_solution(A.shape(1), 1), lars_prediction(A.shape(0), 1),
461  next_lsq_solution(A.shape(1), 1), next_lsq_prediction(A.shape(0), 1), searchVector(A.shape(0), 1),
462  columnPermutation(A.shape(1))
463  {
464  for(unsigned int k=0; k<columnPermutation.size(); ++k)
465  columnPermutation[k] = k;
466  }
467 
468  // copy data for the recursive call in nnlassolsq
469  LarsData(LarsData const & d, int asetSize)
470  : activeSetSize(asetSize),
471  A(d.R.subarray(Shape(0,0), Shape(d.A.shape(0), activeSetSize))), b(d.qtb), R(A), qtb(b),
472  lars_solution(d.lars_solution.subarray(Shape(0,0), Shape(activeSetSize, 1))), lars_prediction(d.lars_prediction),
473  next_lsq_solution(d.next_lsq_solution.subarray(Shape(0,0), Shape(activeSetSize, 1))),
474  next_lsq_prediction(d.next_lsq_prediction), searchVector(d.searchVector),
475  columnPermutation(A.shape(1))
476  {
477  for(unsigned int k=0; k<columnPermutation.size(); ++k)
478  columnPermutation[k] = k;
479  }
480 };
481 
482 template <class T, class C1, class C2, class Array1, class Array2, class Array3>
483 unsigned int
484 leastAngleRegressionMainLoop(LarsData<T, C1, C2> & d,
485  Array1 & activeSets,
486  Array2 * lars_solutions, Array3 * lsq_solutions,
487  LeastAngleRegressionOptions const & options)
488 {
489  using namespace vigra::functor;
490 
491  typedef typename MultiArrayShape<2>::type Shape;
492  typedef typename Matrix<T>::view_type Subarray;
493  typedef ArrayVector<MultiArrayIndex> Permutation;
494  typedef typename Permutation::view_type ColumnSet;
495 
496  vigra_precondition(d.activeSetSize > 0,
497  "leastAngleRegressionMainLoop() must not be called with empty active set.");
498 
499  bool enforce_positive = (options.mode == LeastAngleRegressionOptions::NNLASSO);
500  bool lasso_modification = (options.mode != LeastAngleRegressionOptions::LARS);
501 
502  const MultiArrayIndex rows = rowCount(d.R);
503  const MultiArrayIndex cols = columnCount(d.R);
504  const MultiArrayIndex maxRank = std::min(rows, cols);
505 
506  MultiArrayIndex maxSolutionCount = options.max_solution_count;
507  if(maxSolutionCount == 0)
508  maxSolutionCount = lasso_modification
509  ? 10*maxRank
510  : maxRank;
511 
512  bool needToRemoveColumn = false;
513  MultiArrayIndex columnToBeAdded = 0, columnToBeRemoved = 0;
514  MultiArrayIndex currentSolutionCount = 0;
515  while(currentSolutionCount < maxSolutionCount)
516  {
517  //ColumnSet activeSet = d.columnPermutation.subarray(0, (unsigned int)d.activeSetSize);
518  ColumnSet inactiveSet = d.columnPermutation.subarray((unsigned int)d.activeSetSize, (unsigned int)cols);
519 
520  // find next dimension to be activated
521  Matrix<T> cLARS = transpose(d.A) * (d.b - d.lars_prediction), // correlation with LARS residual
522  cLSQ = transpose(d.A) * (d.b - d.next_lsq_prediction); // correlation with LSQ residual
523 
524  // In theory, all vectors in the active set should have the same correlation C, and
525  // the correlation of all others should not exceed this. In practice, we may find the
526  // maximum correlation in any variable due to tiny numerical inaccuracies. Therefore, we
527  // determine C from the entire set of variables.
528  MultiArrayIndex cmaxIndex = enforce_positive
529  ? argMax(cLARS)
530  : argMax(abs(cLARS));
531  T C = abs(cLARS(cmaxIndex, 0));
532 
533  Matrix<T> ac(cols - d.activeSetSize, 1);
534  for(MultiArrayIndex k = 0; k<cols-d.activeSetSize; ++k)
535  {
536  T rho = cLSQ(inactiveSet[k], 0),
537  cc = C - sign(rho)*cLARS(inactiveSet[k], 0);
538 
539  if(rho == 0.0) // make sure that 0/0 cannot happen in the other cases
540  ac(k,0) = 1.0; // variable k is linearly dependent on the active set
541  else if(rho > 0.0)
542  ac(k,0) = cc / (cc + rho); // variable k would enter the active set with positive sign
543  else if(enforce_positive)
544  ac(k,0) = 1.0; // variable k cannot enter the active set because it would be negative
545  else
546  ac(k,0) = cc / (cc - rho); // variable k would enter the active set with negative sign
547  }
548 
549  // in the non-negative case: make sure that a column just removed cannot re-enter right away
550  // (in standard LASSO, this is allowed, because the variable may re-enter with opposite sign)
551  if(enforce_positive && needToRemoveColumn)
552  ac(columnToBeRemoved-d.activeSetSize,0) = 1.0;
553 
554  // find candidate
555  // Note: R uses Arg1() > epsilon, but this is only possible because it allows several variables to
556  // join the active set simultaneously, so that gamma = 0 cannot occur.
557  columnToBeAdded = argMin(ac);
558 
559  // if no new column can be added, we do a full step gamma = 1.0 and then stop, unless a column is removed below
560  T gamma = (d.activeSetSize == maxRank)
561  ? 1.0
562  : ac(columnToBeAdded, 0);
563 
564  // adjust columnToBeAdded: we skipped the active set
565  if(columnToBeAdded >= 0)
566  columnToBeAdded += d.activeSetSize;
567 
568  // check whether we have to remove a column from the active set
569  needToRemoveColumn = false;
570  if(lasso_modification)
571  {
572  // find dimensions whose weight changes sign below gamma*searchDirection
573  Matrix<T> s(Shape(d.activeSetSize, 1), NumericTraits<T>::max());
574  for(MultiArrayIndex k=0; k<d.activeSetSize; ++k)
575  {
576  if(( enforce_positive && d.next_lsq_solution(k,0) < 0.0) ||
577  (!enforce_positive && sign(d.lars_solution(k,0))*sign(d.next_lsq_solution(k,0)) == -1.0))
578  s(k,0) = d.lars_solution(k,0) / (d.lars_solution(k,0) - d.next_lsq_solution(k,0));
579  }
580 
581  columnToBeRemoved = argMinIf(s, Arg1() <= Param(gamma));
582  if(columnToBeRemoved >= 0)
583  {
584  needToRemoveColumn = true; // remove takes precedence over add
585  gamma = s(columnToBeRemoved, 0);
586  }
587  }
588 
589  // compute the current solutions
590  d.lars_prediction = gamma * d.next_lsq_prediction + (1.0 - gamma) * d.lars_prediction;
591  d.lars_solution = gamma * d.next_lsq_solution + (1.0 - gamma) * d.lars_solution;
592  if(needToRemoveColumn)
593  d.lars_solution(columnToBeRemoved, 0) = 0.0; // turn possible epsilon into an exact zero
594 
595  // write the current solution
596  ++currentSolutionCount;
597  activeSets.push_back(typename Array1::value_type(d.columnPermutation.begin(), d.columnPermutation.begin()+d.activeSetSize));
598 
599  if(lsq_solutions != 0)
600  {
601  if(enforce_positive)
602  {
603  ArrayVector<Matrix<T> > nnresults;
604  ArrayVector<ArrayVector<MultiArrayIndex> > nnactiveSets;
605  LarsData<T, C1, C2> nnd(d, d.activeSetSize);
606 
607  leastAngleRegressionMainLoop(nnd, nnactiveSets, &nnresults, (Array3*)0,
608  LeastAngleRegressionOptions().leastSquaresSolutions(false).nnlasso());
609  //Matrix<T> nnlsq_solution(d.activeSetSize, 1);
610  typename Array2::value_type nnlsq_solution(Shape(d.activeSetSize, 1));
611  for(unsigned int k=0; k<nnactiveSets.back().size(); ++k)
612  {
613  nnlsq_solution(nnactiveSets.back()[k],0) = nnresults.back()[k];
614  }
615  //lsq_solutions->push_back(nnlsq_solution);
616  lsq_solutions->push_back(typename Array3::value_type());
617  lsq_solutions->back() = nnlsq_solution;
618  }
619  else
620  {
621  //lsq_solutions->push_back(d.next_lsq_solution.subarray(Shape(0,0), Shape(d.activeSetSize, 1)));
622  lsq_solutions->push_back(typename Array3::value_type());
623  lsq_solutions->back() = d.next_lsq_solution.subarray(Shape(0,0), Shape(d.activeSetSize, 1));
624  }
625  }
626  if(lars_solutions != 0)
627  {
628  //lars_solutions->push_back(d.lars_solution.subarray(Shape(0,0), Shape(d.activeSetSize, 1)));
629  lars_solutions->push_back(typename Array2::value_type());
630  lars_solutions->back() = d.lars_solution.subarray(Shape(0,0), Shape(d.activeSetSize, 1));
631  }
632 
633  // no further solutions possible
634  if(gamma == 1.0)
635  break;
636 
637  if(needToRemoveColumn)
638  {
639  --d.activeSetSize;
640  if(columnToBeRemoved != d.activeSetSize)
641  {
642  // remove column 'columnToBeRemoved' and restore triangular form of R
643  // note: columnPermutation is automatically swapped here
644  detail::upperTriangularSwapColumns(columnToBeRemoved, d.activeSetSize, d.R, d.qtb, d.columnPermutation);
645 
646  // swap solution entries
647  std::swap(d.lars_solution(columnToBeRemoved, 0), d.lars_solution(d.activeSetSize,0));
648  std::swap(d.next_lsq_solution(columnToBeRemoved, 0), d.next_lsq_solution(d.activeSetSize,0));
649  columnToBeRemoved = d.activeSetSize; // keep track of removed column
650  }
651  d.lars_solution(d.activeSetSize,0) = 0.0;
652  d.next_lsq_solution(d.activeSetSize,0) = 0.0;
653  }
654  else
655  {
656  vigra_invariant(columnToBeAdded >= 0,
657  "leastAngleRegression(): internal error (columnToBeAdded < 0)");
658  // add column 'columnToBeAdded'
659  if(d.activeSetSize != columnToBeAdded)
660  {
661  std::swap(d.columnPermutation[d.activeSetSize], d.columnPermutation[columnToBeAdded]);
662  columnVector(d.R, d.activeSetSize).swapData(columnVector(d.R, columnToBeAdded));
663  columnToBeAdded = d.activeSetSize; // keep track of added column
664  }
665 
666  // zero the corresponding entries of the solutions
667  d.next_lsq_solution(d.activeSetSize,0) = 0.0;
668  d.lars_solution(d.activeSetSize,0) = 0.0;
669 
670  // reduce R (i.e. its newly added column) to triangular form
671  detail::qrColumnHouseholderStep(d.activeSetSize, d.R, d.qtb);
672  ++d.activeSetSize;
673  }
674 
675  // compute the LSQ solution of the new active set
676  Subarray Ractive = d.R.subarray(Shape(0,0), Shape(d.activeSetSize, d.activeSetSize));
677  Subarray qtbactive = d.qtb.subarray(Shape(0,0), Shape(d.activeSetSize, 1));
678  Subarray next_lsq_solution_view = d.next_lsq_solution.subarray(Shape(0,0), Shape(d.activeSetSize, 1));
679  linearSolveUpperTriangular(Ractive, qtbactive, next_lsq_solution_view);
680 
681  // compute the LSQ prediction of the new active set
682  d.next_lsq_prediction.init(0.0);
683  for(MultiArrayIndex k=0; k<d.activeSetSize; ++k)
684  d.next_lsq_prediction += next_lsq_solution_view(k,0)*columnVector(d.A, d.columnPermutation[k]);
685  }
686 
687  return (unsigned int)currentSolutionCount;
688 }
689 
690 template <class T, class C1, class C2, class Array1, class Array2>
691 unsigned int
692 leastAngleRegressionImpl(MultiArrayView<2, T, C1> const & A, MultiArrayView<2, T, C2> const &b,
693  Array1 & activeSets, Array2 * lasso_solutions, Array2 * lsq_solutions,
694  LeastAngleRegressionOptions const & options)
695 {
696  using namespace vigra::functor;
697 
698  const MultiArrayIndex rows = rowCount(A);
699 
700  vigra_precondition(rowCount(b) == rows && columnCount(b) == 1,
701  "leastAngleRegression(): Shape mismatch between matrices A and b.");
702 
703  bool enforce_positive = (options.mode == LeastAngleRegressionOptions::NNLASSO);
704 
705  detail::LarsData<T, C1, C2> d(A, b);
706 
707  // find dimension with largest correlation
708  Matrix<T> c = transpose(A)*b;
709  MultiArrayIndex initialColumn = enforce_positive
710  ? argMaxIf(c, Arg1() > Param(0.0))
711  : argMax(abs(c));
712  if(initialColumn == -1)
713  return 0; // no solution found
714 
715  // prepare initial active set and search direction etc.
716  std::swap(d.columnPermutation[0], d.columnPermutation[initialColumn]);
717  columnVector(d.R, 0).swapData(columnVector(d.R, initialColumn));
718  detail::qrColumnHouseholderStep(0, d.R, d.qtb);
719  d.next_lsq_solution(0,0) = d.qtb(0,0) / d.R(0,0);
720  d.next_lsq_prediction = d.next_lsq_solution(0,0) * columnVector(A, d.columnPermutation[0]);
721  d.searchVector = d.next_lsq_solution(0,0) * columnVector(A, d.columnPermutation[0]);
722 
723  return leastAngleRegressionMainLoop(d, activeSets, lasso_solutions, lsq_solutions, options);
724 }
725 
726 } // namespace detail
727 
728 /** Least Angle Regression.
729 
730  <b>\#include</b> <vigra/regression.hxx><br/>
731  Namespaces: vigra and vigra::linalg
732 
733  <b> Declarations:</b>
734 
735  \code
736  namespace vigra {
737  namespace linalg {
738  // compute either LASSO or least squares solutions
739  template <class T, class C1, class C2, class Array1, class Array2>
740  unsigned int
741  leastAngleRegression(MultiArrayView<2, T, C1> const & A, MultiArrayView<2, T, C2> const &b,
742  Array1 & activeSets, Array2 & solutions,
743  LeastAngleRegressionOptions const & options = LeastAngleRegressionOptions());
744 
745  // compute LASSO and least squares solutions
746  template <class T, class C1, class C2, class Array1, class Array2>
747  unsigned int
748  leastAngleRegression(MultiArrayView<2, T, C1> const & A, MultiArrayView<2, T, C2> const &b,
749  Array1 & activeSets, Array2 & lasso_solutions, Array2 & lsq_solutions,
750  LeastAngleRegressionOptions const & options = LeastAngleRegressionOptions());
751  }
752  using linalg::leastAngleRegression;
753  }
754  \endcode
755 
756  This function implements Least Angle Regression (LARS) as described in
757 
758  &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;
759  B.Efron, T.Hastie, I.Johnstone, and R.Tibshirani: <i>"Least Angle Regression"</i>,
760  Annals of Statistics 32(2):407-499, 2004.
761 
762  It is an efficient algorithm to solve the L1-regularized least squares (LASSO) problem
763 
764  \f[ \tilde \textrm{\bf x} = \textrm{argmin}
765  \left|\left|\textrm{\bf A} \textrm{\bf x} - \textrm{\bf b}\right|\right|_2^2
766  \textrm{ subject to } \left|\left|\textrm{\bf x}\right|\right|_1\le s
767  \f]
768 
769  and the L1-regularized non-negative least squares (NN-LASSO) problem
770 
771  \f[ \tilde \textrm{\bf x} = \textrm{argmin} \left|\left|\textrm{\bf A} \textrm{\bf x} - \textrm{\bf b}\right|\right|_2^2
772  \textrm{ subject to } \left|\left|\textrm{\bf x}\right|\right|_1\le s \textrm{ and } \textrm{\bf x}\ge \textrm{\bf 0}
773  \f]
774 
775  where \a A is a matrix with <tt>m</tt> rows and <tt>n</tt> columns (often with <tt>m < n</tt>),
776  \a b a vector of length <tt>m</tt>, and a regularization parameter s >= 0.0.
777  L1-regularization has the desirable effect that it causes the solution <b>x</b> to be sparse, i.e. only
778  the most important elements in <b>x</b> (called the <em>active set</em>) have non-zero values. The
779  key insight of the LARS algorithm is the following: When the solution vector <b>x</b> is considered
780  as a function of the regularization parameter s, then <b>x</b>(s) is a piecewise
781  linear function, i.e. a polyline in n-dimensional space. The knots of the polyline <b>x</b>(s)
782  are located precisely at those values of s where one variable enters or leaves the active set
783  and can be efficiently computed.
784 
785  Therefore, leastAngleRegression() returns the entire solution path as a sequence of knot points, starting
786  at \f$\textrm{\bf x}(s=0)\f$ (where the only feasible solution is obviously <b>x</b> = 0) and ending at
787  \f$\textrm{\bf x}(s=\infty)\f$ (where the solution becomes the ordinary least squares solution). Actually,
788  the initial null solution is not explicitly returned, i.e. the sequence starts at the first non-zero
789  solution with one variable in the active set. The function leastAngleRegression() returns the number
790  of solutions (i.e. knot points) computed.
791 
792  The sequences of active sets and corresponding variable weights are returned in \a activeSets and
793  \a solutions respectively. That is, <tt>activeSets[i]</tt> is an \ref vigra::ArrayVector "ArrayVector<int>"
794  containing the indices of the variables that are active at the i-th knot, and <tt>solutions</tt> is a
795  \ref vigra::linalg::Matrix "Matrix<T>" containing the weights of those variables, in the same order (see
796  example below). Variables not contained in <tt>activeSets[i]</tt> are zero at this solution.
797 
798  The behavior of the algorithm can be adapted by \ref vigra::linalg::LeastAngleRegressionOptions
799  "LeastAngleRegressionOptions":
800  <DL>
801  <DT><b>options.lasso()</b> (active by default)
802  <DD> Compute the LASSO solution as described above.
803  <DT><b>options.nnlasso()</b> (inactive by default)
804  <DD> Compute non-negative LASSO solutions, i.e. use the additional constraint that
805  <b>x</b> >= 0 in all solutions.
806  <DT><b>options.lars()</b> (inactive by default)
807  <DD> Compute a solution path according to the plain LARS rule, i.e. never remove
808  a variable from the active set once it entered.
809  <DT><b>options.leastSquaresSolutions(bool)</b> (default: true)
810  <DD> Use the algorithm mode selected above
811  to determine the sequence of active sets, but then compute and return an
812  ordinary (unconstrained) least squares solution for every active set.<br>
813  <b>Note:</b> The second form of leastAngleRegression() ignores this option and
814  does always compute both constrained and unconstrained solutions (returned in
815  \a lasso_solutions and \a lsq_solutions respectively).
816  <DT><b>maxSolutionCount(unsigned int n)</b> (default: n = 0, i.e. compute all solutions)
817  <DD> Compute at most <tt>n</tt> solutions.
818  </DL>
819 
820  <b>Usage:</b>
821 
822  \code
823  int m = ..., n = ...;
824  Matrix<double> A(m, n), b(m, 1);
825  ... // fill A and b
826 
827  // normalize the input
828  Matrix<double> offset(1,n), scaling(1,n);
829  prepareColumns(A, A, offset, scaling, DataPreparationGoals(ZeroMean|UnitVariance));
830  prepareColumns(b, b, DataPreparationGoals(ZeroMean));
831 
832  // arrays to hold the output
833  ArrayVector<ArrayVector<int> > activeSets;
834  ArrayVector<Matrix<double> > solutions;
835 
836  // run leastAngleRegression() in non-negative LASSO mode
837  int numSolutions = leastAngleRegression(A, b, activeSets, solutions,
838  LeastAngleRegressionOptions().nnlasso());
839 
840  // print results
841  Matrix<double> denseSolution(1, n);
842  for (MultiArrayIndex k = 0; k < numSolutions; ++k)
843  {
844  // transform the sparse solution into a dense vector
845  denseSolution.init(0.0); // ensure that inactive variables are zero
846  for (unsigned int i = 0; i < activeSets[k].size(); ++i)
847  {
848  // set the values of the active variables;
849  // activeSets[k][i] is the true index of the i-th variable in the active set
850  denseSolution(0, activeSets[k][i]) = solutions[k](i,0);
851  }
852 
853  // invert the input normalization
854  denseSolution = denseSolution * pointWise(scaling);
855 
856  // output the solution
857  std::cout << "solution " << k << ":\n" << denseSolution << std::endl;
858  }
859  \endcode
860 
861  <b>Required Interface:</b>
862 
863  <ul>
864  <li> <tt>T</tt> must be numeric type (compatible to double)
865  <li> <tt>Array1 a1;</tt><br>
866  <tt>a1.push_back(ArrayVector<int>());</tt>
867  <li> <tt>Array2 a2;</tt><br>
868  <tt>a2.push_back(Matrix<T>());</tt>
869  </ul>
870  */
871 doxygen_overloaded_function(template <...> unsigned int leastAngleRegression)
872 
873 template <class T, class C1, class C2, class Array1, class Array2>
874 inline unsigned int
875 leastAngleRegression(MultiArrayView<2, T, C1> const & A, MultiArrayView<2, T, C2> const &b,
876  Array1 & activeSets, Array2 & solutions,
877  LeastAngleRegressionOptions const & options = LeastAngleRegressionOptions())
878 {
879  if(options.least_squares_solutions)
880  return detail::leastAngleRegressionImpl(A, b, activeSets, (Array2*)0, &solutions, options);
881  else
882  return detail::leastAngleRegressionImpl(A, b, activeSets, &solutions, (Array2*)0, options);
883 }
884 
885 template <class T, class C1, class C2, class Array1, class Array2>
886 inline unsigned int
887 leastAngleRegression(MultiArrayView<2, T, C1> const & A, MultiArrayView<2, T, C2> const &b,
888  Array1 & activeSets, Array2 & lasso_solutions, Array2 & lsq_solutions,
889  LeastAngleRegressionOptions const & options = LeastAngleRegressionOptions())
890 {
891  return detail::leastAngleRegressionImpl(A, b, activeSets, &lasso_solutions, &lsq_solutions, options);
892 }
893 
894  /** Non-negative Least Squares Regression.
895 
896  Given a matrix \a A with <tt>m</tt> rows and <tt>n</tt> columns (with <tt>m >= n</tt>),
897  and a column vector \a b of length <tt>m</tt> rows, this function computes
898  a column vector \a x of length <tt>n</tt> with <b>non-negative entries</b> that solves the optimization problem
899 
900  \f[ \tilde \textrm{\bf x} = \textrm{argmin}
901  \left|\left|\textrm{\bf A} \textrm{\bf x} - \textrm{\bf b}\right|\right|_2^2
902  \textrm{ subject to } \textrm{\bf x} \ge \textrm{\bf 0}
903  \f]
904 
905  Both \a b and \a x must be column vectors (i.e. matrices with <tt>1</tt> column).
906  Note that all matrices must already have the correct shape. The solution is computed by means
907  of \ref leastAngleRegression() with non-negativity constraint.
908 
909  <b>\#include</b> <vigra/regression.hxx>
910  Namespaces: vigra and vigra::linalg
911  */
912 template <class T, class C1, class C2, class C3>
913 inline void
916 {
917  vigra_precondition(columnCount(A) == rowCount(x) && rowCount(A) == rowCount(b),
918  "nonnegativeLeastSquares(): Matrix shape mismatch.");
919  vigra_precondition(columnCount(b) == 1 && columnCount(x) == 1,
920  "nonnegativeLeastSquares(): RHS and solution must be vectors (i.e. columnCount == 1).");
921 
923  ArrayVector<Matrix<T> > results;
924 
925  leastAngleRegression(A, b, activeSets, results,
926  LeastAngleRegressionOptions().leastSquaresSolutions(false).nnlasso());
927  x.init(NumericTraits<T>::zero());
928  if(activeSets.size() > 0)
929  for(unsigned int k=0; k<activeSets.back().size(); ++k)
930  x(activeSets.back()[k],0) = results.back()[k];
931 }
932 
933 
934 //@}
935 
936 } // namespace linalg
937 
945 using linalg::LeastAngleRegressionOptions;
946 
947 namespace detail {
948 
949 template <class T, class S>
950 inline T
951 getRow(MultiArrayView<1, T, S> const & a, MultiArrayIndex i)
952 {
953  return a(i);
954 }
955 
956 template <class T, class S>
957 inline MultiArrayView<1, T>
958 getRow(MultiArrayView<2, T, S> const & a, MultiArrayIndex i)
959 {
960  return a.bindInner(i);
961 }
962 
963 } // namespace detail
964 
965 /** \addtogroup Optimization
966  */
967 //@{
968 
969 /** \brief Pass options to nonlinearLeastSquares().
970 
971  <b>\#include</b> <vigra/regression.hxx>
972  Namespace: vigra
973 */
975 {
976  public:
977 
978  double epsilon, lambda, tau;
979  int max_iter;
980 
981  /** \brief Initialize options with default values.
982  */
984  : epsilon(0.0),
985  lambda(0.1),
986  tau(1.4),
987  max_iter(50)
988  {}
989 
990  /** \brief Set minimum relative improvement in residual.
991 
992  The algorithm stops when the relative improvement in residuals
993  between consecutive iterations is less than this value.
994 
995  Default: 0 (i.e. choose tolerance automatically, will be 10*epsilon of the numeric type)
996  */
998  {
999  epsilon = eps;
1000  return *this;
1001  }
1002 
1003  /** \brief Set maximum number of iterations.
1004 
1005  Default: 50
1006  */
1008  {
1009  max_iter = iter;
1010  return *this;
1011  }
1012 
1013  /** \brief Set damping parameters for Levenberg-Marquardt algorithm.
1014 
1015  \a lambda determines by how much the diagonal is emphasized, and \a v is
1016  the factor by which lambda will be increased if more damping is needed
1017  for convergence
1018  (see <a href="http://en.wikipedia.org/wiki/Levenberg%E2%80%93Marquardt_algorithm">Wikipedia</a>
1019  for more explanations).
1020 
1021  Default: lambda = 0.1, v = 1.4
1022  */
1023  NonlinearLSQOptions & dampingParamters(double lambda, double v)
1024  {
1025  vigra_precondition(lambda > 0.0 && v > 0.0,
1026  "NonlinearLSQOptions::dampingParamters(): parameters must be positive.");
1027  this->lambda = lambda;
1028  tau = v;
1029  return *this;
1030  }
1031 };
1032 
1033 template <unsigned int D, class T, class S1, class S2,
1034  class U, int N,
1035  class Functor>
1036 T
1037 nonlinearLeastSquaresImpl(MultiArrayView<D, T, S1> const & features,
1038  MultiArrayView<1, T, S2> const & response,
1039  TinyVector<U, N> & p,
1040  Functor model,
1041  NonlinearLSQOptions const & options)
1042 {
1043  vigra_precondition(features.shape(0) == response.shape(0),
1044  "nonlinearLeastSquares(): shape mismatch between features and response.");
1045 
1046  double t = options.tau, l = options.lambda; // initial damping parameters
1047 
1048  double epsilonT = NumericTraits<T>::epsilon()*10.0,
1049  epsilonU = NumericTraits<U>::epsilon()*10.0,
1050  epsilon = options.epsilon <= 0.0
1051  ? std::max(epsilonT, epsilonU)
1052  : options.epsilon;
1053 
1054  linalg::Matrix<T> jj(N,N); // outer product of the Jacobian
1055  TinyVector<U, N> jr, dp;
1056 
1057  T residual = 0.0;
1058  bool didStep = true;
1059 
1060  for(int iter=0; iter<options.max_iter; ++iter)
1061  {
1062  if(didStep)
1063  {
1064  // update the residual and Jacobian
1065  residual = 0.0;
1066  jr = 0.0;
1067  jj = 0.0;
1068 
1069  for(int i=0; i<features.shape(0); ++i)
1070  {
1071  autodiff::DualVector<U, N> res = model(detail::getRow(features, i), autodiff::dualMatrix(p));
1072 
1073  T r = response(i) - res.v;
1074  jr += r * res.d;
1075  jj += outer(res.d);
1076  residual += sq(r);
1077  }
1078  }
1079 
1080  // perform a damped gradient step
1081  linalg::Matrix<T> djj(jj);
1082  djj.diagonal() *= 1.0 + l;
1083  linearSolve(djj, jr, dp);
1084 
1085  TinyVector<U, N> p_new = p + dp;
1086 
1087  // compute the new residual
1088  T residual_new = 0.0;
1089  for(int i=0; i<features.shape(0); ++i)
1090  {
1091  residual_new += sq(response(i) - model(detail::getRow(features, i), p_new));
1092  }
1093 
1094  if(residual_new < residual)
1095  {
1096  // accept the step
1097  p = p_new;
1098  if(std::abs((residual - residual_new) / residual) < epsilon)
1099  return residual_new;
1100  // try less damping in the next iteration
1101  l /= t;
1102  didStep = true;
1103  }
1104  else
1105  {
1106  // reject the step und use more damping in the next iteration
1107  l *= t;
1108  didStep = false;
1109  }
1110  }
1111 
1112  return residual;
1113 }
1114 
1115 /********************************************************/
1116 /* */
1117 /* nonlinearLeastSquares */
1118 /* */
1119 /********************************************************/
1120 
1121 /** \brief Fit a non-linear model to given data by minimizing least squares loss.
1122 
1123  <b> Declarations:</b>
1124 
1125  \code
1126  namespace vigra {
1127  // variant 1: optimize a univariate model ('x' is a 1D array of scalar data points)
1128  template <class T, class S1, class S2,
1129  class U, int N,
1130  class Functor>
1131  T
1132  nonlinearLeastSquares(MultiArrayView<1, T, S1> const & x,
1133  MultiArrayView<1, T, S2> const & y,
1134  TinyVector<U, N> & model_parameters,
1135  Functor model,
1136  NonlinearLSQOptions const & options = NonlinearLSQOptions());
1137 
1138  // variant 2: optimize a multivariate model ('x' is a 2D array of vector-valued data points)
1139  template <class T, class S1, class S2,
1140  class U, int N,
1141  class Functor>
1142  T
1143  nonlinearLeastSquares(MultiArrayView<2, T, S1> const & x,
1144  MultiArrayView<1, T, S2> const & y,
1145  TinyVector<U, N> & model_parameters,
1146  Functor model,
1147  NonlinearLSQOptions const & options = NonlinearLSQOptions());
1148  }
1149  \endcode
1150 
1151  This function implements the
1152  <a href="http://en.wikipedia.org/wiki/Levenberg%E2%80%93Marquardt_algorithm">Levenberg-Marquardt algorithm</a>
1153  to fit a non-linear model to given data. The model depends on a vector of
1154  parameters <b>p</b> which are to be choosen such that the least-squares residual
1155  between the data and the model's predictions is minimized according to the objective function:
1156 
1157  \f[ \tilde \textrm{\bf p} = \textrm{argmin}_{\textrm{\bf p}} \sum_i \left( y_i - f(\textrm{\bf x}_i; \textrm{\bf p}) \right)^2
1158  \f]
1159 
1160  where \f$f(\textrm{\bf x}; \textrm{\bf p})\f$ is the model to be optimized
1161  (with arguments \f$\textrm{\bf x}\f$ and parameters \f$\textrm{\bf p}\f$), and
1162  \f$(\textrm{\bf x}_i; y_i)\f$ are the feature/response pairs of the given data.
1163  Since the model is non-linear (otherwise, you should use ordinary \ref leastSquares()),
1164  it must be linearized in terms of a first-order Taylor expansion, and the optimal
1165  parameters <b>p</b> have to be determined iteratively. In order for the iterations to
1166  converge to the desired solution, a good initial guess on <b>p</b> is required.
1167 
1168  The model must be specified by a functor which takes one of the following forms:
1169  \code
1170  typedef double DataType; // type of your data samples, may be any numeric type
1171  static const int N = ...; // number of model parameters
1172 
1173  // variant 1: the features x are scalars
1174  struct UnivariateModel
1175  {
1176  template <class T>
1177  T operator()(DataType x, TinyVector<T, N> const & p) const { ... }
1178  };
1179 
1180  // variant 2: the features x are vectors
1181  struct MultivariateModel
1182  {
1183  template <class T>
1184  T operator()(MultiArrayView<1, DataType> const & x, TinyVector<T, N> const & p) const { ... }
1185  };
1186  \endcode
1187  Each call to the functor's <tt>operator()</tt> computes the model's prediction for a single data
1188  point. The current model parameters are specified in a TinyVector of appropriate length.
1189  The type <tt>T</tt> must be templated: normally, it is the same as <tt>DataType</tt>, but
1190  the nonlinearLeastSquares() function will temporarily replace it with a special number type
1191  that supports <a href="http://en.wikipedia.org/wiki/Automatic_differentiation">automatic differentiation</a>
1192  (see \ref vigra::autodiff::DualVector). In this way, the derivatives needed in the model's Taylor
1193  expansion can be computed automatically.
1194 
1195  When the model is univariate (has a single scalar argument), the samples must be passed to
1196  nonlinearLeastSquares() in a pair 'x', 'y' of 1D <tt>MultiArrayView</tt>s (variant 1).
1197  When the model is multivariate (has a vector-valued argument), the 'x' input must
1198  be a 2D <tt>MultiArrayView</tt> (variant 2) whose rows represent a single data sample
1199  (i.e. the number of columns corresponds to the length of the model's argument vector).
1200  The number of rows in 'x' defines the number of data samples and must match the length
1201  of array 'y'.
1202 
1203  The <tt>TinyVector</tt> 'model_parameters' holds the initial guess for the model parameters and
1204  will be overwritten by the optimal values found by the algorithm. The algorithm's internal behavior
1205  can be controlled by customizing the option object \ref vigra::NonlinearLSQOptions.
1206 
1207  The function returns the residual sum of squared errors of the final solution.
1208 
1209  <b> Usage:</b>
1210 
1211  <b>\#include</b> <vigra/regression.hxx><br>
1212  Namespace: vigra
1213 
1214  Suppose that we want to fit a centered Gaussian function of the form
1215  \f[ f(x ; a, s, b) = a \exp\left(-\frac{x^2}{2 s^2}\right) + b \f]
1216  to noisy data \f$(x_i, y_i)\f$, i.e. we want to find parameters a, s, b such that
1217  the residual \f$\sum_i \left(y_i - f(x_i; a,s,b)\right)^2\f$ is minimized.
1218  The model parameters are placed in a <tt>TinyVector<T, 3></tt> <b>p</b> according to the rules<br/>
1219  <tt>p[0] <=> a</tt>, <tt>p[1] <=> s</tt> and <tt>p[2] <=> b</tt>.<br/> The following
1220  functor computes the model's prediction for a single data point <tt>x</tt>:
1221  \code
1222  struct GaussianModel
1223  {
1224  template <class T>
1225  T operator()(double x, TinyVector<T, 3> const & p) const
1226  {
1227  return p[0] * exp(-0.5 * sq(x / p[1])) + p[2];
1228  }
1229  };
1230  \endcode
1231  Now we can find optimal values for the parameters like this:
1232  \code
1233  int size = ...; // number of data points
1234  MultiArray<1, double> x(size), y(size);
1235  ... // fill the data arrays
1236 
1237  TinyVector<double, 3> p(2.0, 1.0, 0.5); // your initial guess of the parameters
1238  // (will be overwritten with the optimal values)
1239  double residual = nonlinearLeastSquares(x, y, p, GaussianModel());
1240 
1241  std::cout << "Model parameters: a=" << p[0] << ", s=" << p[1] << ", b=" << p[2] << " (residual: " << residual << ")\n";
1242  \endcode
1243 */
1244 doxygen_overloaded_function(template <...> void nonlinearLeastSquares)
1245 
1246 template <class T, class S1, class S2,
1247  class U, int N,
1248  class Functor>
1249 inline T
1250 nonlinearLeastSquares(MultiArrayView<1, T, S1> const & features,
1251  MultiArrayView<1, T, S2> const & response,
1252  TinyVector<U, N> & p,
1253  Functor model,
1254  NonlinearLSQOptions const & options = NonlinearLSQOptions())
1255 {
1256  return nonlinearLeastSquaresImpl(features, response, p, model, options);
1257 }
1258 
1259 template <class T, class S1, class S2,
1260  class U, int N,
1261  class Functor>
1262 inline T
1263 nonlinearLeastSquares(MultiArrayView<2, T, S1> const & features,
1264  MultiArrayView<1, T, S2> const & response,
1265  TinyVector<U, N> & p,
1266  Functor model,
1267  NonlinearLSQOptions const & options = NonlinearLSQOptions())
1268 {
1269  return nonlinearLeastSquaresImpl(features, response, p, model, options);
1270 }
1271 
1272 //@}
1273 
1274 } // namespace vigra
1275 
1276 #endif // VIGRA_REGRESSION_HXX
LeastAngleRegressionOptions & maxSolutionCount(unsigned int n)
Definition: regression.hxx:365
bool ridgeRegression(MultiArrayView< 2, T, C1 > const &A, MultiArrayView< 2, T, C2 > const &b, MultiArrayView< 2, T, C3 > &x, double lambda)
Definition: regression.hxx:181
bool weightedLeastSquares(MultiArrayView< 2, T, C1 > const &A, MultiArrayView< 2, T, C2 > const &b, MultiArrayView< 2, T, C3 > const &weights, MultiArrayView< 2, T, C4 > &x, std::string method="QR")
Definition: regression.hxx:123
reference back()
Definition: array_vector.hxx:293
MultiArrayView< 2, T, C > columnVector(MultiArrayView< 2, T, C > const &m, MultiArrayIndex d)
Definition: matrix.hxx:725
LeastAngleRegressionOptions & nnlasso()
Definition: regression.hxx:419
std::string tolower(std::string s)
Definition: utilities.hxx:93
Pass options to leastAngleRegression().
Definition: regression.hxx:344
NonlinearLSQOptions & maxIterations(int iter)
Set maximum number of iterations.
Definition: regression.hxx:1007
MultiArrayIndex rowCount(const MultiArrayView< 2, T, C > &x)
Definition: matrix.hxx:669
void transpose(const MultiArrayView< 2, T, C1 > &v, MultiArrayView< 2, T, C2 > &r)
Definition: matrix.hxx:963
int argMin(MultiArrayView< 2, T, C > const &a)
Find the index of the minimum element in a matrix.
Definition: matrix.hxx:1951
const difference_type & shape() const
Definition: multi_array.hxx:1551
NonlinearLSQOptions & dampingParamters(double lambda, double v)
Set damping parameters for Levenberg-Marquardt algorithm.
Definition: regression.hxx:1023
linalg::TemporaryMatrix< T > sign(MultiArrayView< 2, T, C > const &v)
LeastAngleRegressionOptions & leastSquaresSolutions(bool select=true)
Definition: regression.hxx:432
NonlinearLSQOptions()
Initialize options with default values.
Definition: regression.hxx:983
int argMinIf(MultiArrayView< 2, T, C > const &a, UnaryFunctor condition)
Find the index of the minimum element in a matrix subject to a condition.
Definition: matrix.hxx:2019
LeastAngleRegressionOptions & lars()
Definition: regression.hxx:397
Pass options to nonlinearLeastSquares().
Definition: regression.hxx:974
int argMaxIf(MultiArrayView< 2, T, C > const &a, UnaryFunctor condition)
Find the index of the maximum element in a matrix subject to a condition.
Definition: matrix.hxx:2054
std::ptrdiff_t MultiArrayIndex
Definition: multi_shape.hxx:55
void nonnegativeLeastSquares(MultiArrayView< 2, T, C1 > const &A, MultiArrayView< 2, T, C2 > const &b, MultiArrayView< 2, T, C3 > &x)
Definition: regression.hxx:914
bool ridgeRegressionSeries(MultiArrayView< 2, T, C1 > const &A, MultiArrayView< 2, T, C2 > const &b, MultiArrayView< 2, T, C3 > &x, Array const &lambda)
Definition: regression.hxx:304
bool leastSquares(MultiArrayView< 2, T, C1 > const &A, MultiArrayView< 2, T, C2 > const &b, MultiArrayView< 2, T, C3 > &x, std::string method="QR")
Definition: regression.hxx:81
LeastAngleRegressionOptions()
Definition: regression.hxx:351
linalg::TemporaryMatrix< T > sq(MultiArrayView< 2, T, C > const &v)
bool weightedRidgeRegression(MultiArrayView< 2, T, C1 > const &A, MultiArrayView< 2, T, C2 > const &b, MultiArrayView< 2, T, C3 > const &weights, MultiArrayView< 2, T, C4 > &x, double lambda)
Definition: regression.hxx:252
LeastAngleRegressionOptions & setMode(std::string mode)
Definition: regression.hxx:378
Class for fixed size vectors.This class contains an array of size SIZE of the specified VALUETYPE...
Definition: accessor.hxx:940
int argMax(MultiArrayView< 2, T, C > const &a)
Find the index of the maximum element in a matrix.
Definition: matrix.hxx:1984
double gamma(double x)
The gamma function.
Definition: mathutil.hxx:1549
void outer(const MultiArrayView< 2, T, C1 > &x, const MultiArrayView< 2, T, C2 > &y, MultiArrayView< 2, T, C3 > &r)
Definition: matrix.hxx:1457
MultiArrayIndex columnCount(const MultiArrayView< 2, T, C > &x)
Definition: matrix.hxx:682
LeastAngleRegressionOptions & lasso()
Definition: regression.hxx:408
FFTWComplex< R >::NormType abs(const FFTWComplex< R > &a)
absolute value (= magnitude)
Definition: fftw3.hxx:1002
MultiArrayView & init(const U &init)
Definition: multi_array.hxx:1150
bool linearSolveUpperTriangular(const MultiArrayView< 2, T, C1 > &r, const MultiArrayView< 2, T, C2 > &b, MultiArrayView< 2, T, C3 > x)
Definition: linear_solve.hxx:1014
unsigned int singularValueDecomposition(MultiArrayView< 2, T, C1 > const &A, MultiArrayView< 2, T, C2 > &U, MultiArrayView< 2, T, C3 > &S, MultiArrayView< 2, T, C4 > &V)
Definition: singular_value_decomposition.hxx:75
void nonlinearLeastSquares(...)
Fit a non-linear model to given data by minimizing least squares loss.
size_type size() const
Definition: array_vector.hxx:330
linalg::TemporaryMatrix< T > abs(MultiArrayView< 2, T, C > const &v)
unsigned int leastAngleRegression(...)
bool linearSolve(...)
NonlinearLSQOptions & tolerance(double eps)
Set minimum relative improvement in residual.
Definition: regression.hxx:997
SquareRootTraits< FixedPoint< IntBits, FracBits > >::SquareRootResult sqrt(FixedPoint< IntBits, FracBits > v)
square root.
Definition: fixedpoint.hxx:616

© Ullrich Köthe (ullrich.koethe@iwr.uni-heidelberg.de)
Heidelberg Collaboratory for Image Processing, University of Heidelberg, Germany

html generated using doxygen and Python
vigra 1.10.0 (Tue Dec 31 2013)