You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
1124 lines
39 KiB
1124 lines
39 KiB
/*
|
|
* Library: lmfit (Levenberg-Marquardt least squares fitting)
|
|
*
|
|
* File: lmmin.c
|
|
*
|
|
* Contents: Levenberg-Marquardt minimization.
|
|
*
|
|
* Copyright: MINPACK authors, The University of Chikago (1980-1999)
|
|
* Joachim Wuttke, Forschungszentrum Juelich GmbH (2004-2013)
|
|
*
|
|
* License: see ../COPYING (FreeBSD)
|
|
*
|
|
* Homepage: apps.jcns.fz-juelich.de/lmfit
|
|
*/
|
|
|
|
#include <assert.h>
|
|
#include <stdlib.h>
|
|
#include <stdio.h>
|
|
#include <math.h>
|
|
#include <float.h>
|
|
#include "lmmin.h"
|
|
|
|
#define MIN(a, b) (((a) <= (b)) ? (a) : (b))
|
|
#define MAX(a, b) (((a) >= (b)) ? (a) : (b))
|
|
#define SQR(x) (x) * (x)
|
|
|
|
/* Declare functions that do the heavy numerics.
|
|
Implementions are in this source file, below lmmin.
|
|
Dependences: lmmin calls lmpar, which calls qrfac and qrsolv. */
|
|
void lm_lmpar(const int n, double* r, const int ldr, const int* Pivot,
|
|
const double* diag, const double* qtb, const double delta,
|
|
double* par, double* x, double* Sdiag, double* aux, double* xdi);
|
|
void lm_qrfac(const int m, const int n, double* A, int* Pivot, double* Rdiag,
|
|
double* Acnorm, double* W);
|
|
void lm_qrsolv(const int n, double* r, const int ldr, const int* Pivot,
|
|
const double* diag, const double* qtb, double* x,
|
|
double* Sdiag, double* W);
|
|
|
|
/******************************************************************************/
|
|
/* Numeric constants */
|
|
/******************************************************************************/
|
|
|
|
/* Set machine-dependent constants to values from float.h. */
|
|
#define LM_MACHEP DBL_EPSILON /* resolution of arithmetic */
|
|
#define LM_DWARF DBL_MIN /* smallest nonzero number */
|
|
#define LM_SQRT_DWARF sqrt(DBL_MIN) /* square should not underflow */
|
|
#define LM_SQRT_GIANT sqrt(DBL_MAX) /* square should not overflow */
|
|
#define LM_USERTOL 30 * LM_MACHEP /* users are recommended to require this */
|
|
|
|
/* If the above values do not work, the following seem good for an x86:
|
|
LM_MACHEP .555e-16
|
|
LM_DWARF 9.9e-324
|
|
LM_SQRT_DWARF 1.e-160
|
|
LM_SQRT_GIANT 1.e150
|
|
LM_USER_TOL 1.e-14
|
|
The following values should work on any machine:
|
|
LM_MACHEP 1.2e-16
|
|
LM_DWARF 1.0e-38
|
|
LM_SQRT_DWARF 3.834e-20
|
|
LM_SQRT_GIANT 1.304e19
|
|
LM_USER_TOL 1.e-14
|
|
*/
|
|
|
|
/* Predefined control parameter sets (msgfile=NULL means stdout). */
|
|
const lm_control_struct lm_control_double = {
|
|
LM_USERTOL, LM_USERTOL, LM_USERTOL, LM_USERTOL,
|
|
100., 100, 1, NULL, 0, -1, -1};
|
|
const lm_control_struct lm_control_float = {
|
|
1.e-7, 1.e-7, 1.e-7, 1.e-7,
|
|
100., 100, 1, NULL, 0, -1, -1};
|
|
|
|
/******************************************************************************/
|
|
/* Message texts (indexed by status.info) */
|
|
/******************************************************************************/
|
|
|
|
const char* lm_infmsg[] = {
|
|
"found zero (sum of squares below underflow limit)",
|
|
"converged (the relative error in the sum of squares is at most tol)",
|
|
"converged (the relative error of the parameter vector is at most tol)",
|
|
"converged (both errors are at most tol)",
|
|
"trapped (by degeneracy; increasing epsilon might help)",
|
|
"exhausted (number of function calls exceeding preset patience)",
|
|
"failed (ftol<tol: cannot reduce sum of squares any further)",
|
|
"failed (xtol<tol: cannot improve approximate solution any further)",
|
|
"failed (gtol<tol: cannot improve approximate solution any further)",
|
|
"crashed (not enough memory)",
|
|
"exploded (fatal coding error: improper input parameters)",
|
|
"stopped (break requested within function evaluation)",
|
|
"found nan (function value is not-a-number or infinite)"};
|
|
|
|
const char* lm_shortmsg[] = {
|
|
"found zero",
|
|
"converged (f)",
|
|
"converged (p)",
|
|
"converged (2)",
|
|
"degenerate",
|
|
"call limit",
|
|
"failed (f)",
|
|
"failed (p)",
|
|
"failed (o)",
|
|
"no memory",
|
|
"invalid input",
|
|
"user break",
|
|
"found nan"};
|
|
|
|
/******************************************************************************/
|
|
/* Monitoring auxiliaries. */
|
|
/******************************************************************************/
|
|
|
|
void lm_print_pars(int nout, const double* par, FILE* fout)
|
|
{
|
|
int i;
|
|
for (i = 0; i < nout; ++i)
|
|
fprintf(fout, " %16.9g", par[i]);
|
|
fprintf(fout, "\n");
|
|
}
|
|
|
|
/******************************************************************************/
|
|
/* lmmin (main minimization routine) */
|
|
/******************************************************************************/
|
|
|
|
void lmmin(const int n, double* x, const int m, const void* data,
|
|
void (*evaluate)(const double* par, const int m_dat,
|
|
const void* data, double* fvec, int* userbreak),
|
|
const lm_control_struct* C, lm_status_struct* S)
|
|
{
|
|
int j, i;
|
|
double actred, dirder, fnorm, fnorm1, gnorm, pnorm, prered, ratio, step,
|
|
sum, temp, temp1, temp2, temp3;
|
|
|
|
/*** Initialize internal variables. ***/
|
|
|
|
int maxfev = C->patience * (n+1);
|
|
|
|
int inner_success; /* flag for loop control */
|
|
double lmpar = 0; /* Levenberg-Marquardt parameter */
|
|
double delta = 0;
|
|
double xnorm = 0;
|
|
double eps = sqrt(MAX(C->epsilon, LM_MACHEP)); /* for forward differences */
|
|
|
|
int nout = C->n_maxpri == -1 ? n : MIN(C->n_maxpri, n);
|
|
|
|
/* Reinterpret C->msgfile=NULL as stdout (which is unavailable for
|
|
compile-time initialization of lm_control_double and similar). */
|
|
FILE* msgfile = C->msgfile ? C->msgfile : stdout;
|
|
|
|
/*** Default status info; must be set before first return statement. ***/
|
|
|
|
S->outcome = 0; /* status code */
|
|
S->userbreak = 0;
|
|
S->nfev = 0; /* function evaluation counter */
|
|
|
|
/*** Check input parameters for errors. ***/
|
|
|
|
if (n <= 0) {
|
|
fprintf(stderr, "lmmin: invalid number of parameters %i\n", n);
|
|
S->outcome = 10;
|
|
return;
|
|
}
|
|
if (m < n) {
|
|
fprintf(stderr, "lmmin: number of data points (%i) "
|
|
"smaller than number of parameters (%i)\n",
|
|
m, n);
|
|
S->outcome = 10;
|
|
return;
|
|
}
|
|
if (C->ftol < 0 || C->xtol < 0 || C->gtol < 0) {
|
|
fprintf(stderr,
|
|
"lmmin: negative tolerance (at least one of %g %g %g)\n",
|
|
C->ftol, C->xtol, C->gtol);
|
|
S->outcome = 10;
|
|
return;
|
|
}
|
|
if (maxfev <= 0) {
|
|
fprintf(stderr, "lmmin: nonpositive function evaluations limit %i\n",
|
|
maxfev);
|
|
S->outcome = 10;
|
|
return;
|
|
}
|
|
if (C->stepbound <= 0) {
|
|
fprintf(stderr, "lmmin: nonpositive stepbound %g\n", C->stepbound);
|
|
S->outcome = 10;
|
|
return;
|
|
}
|
|
if (C->scale_diag != 0 && C->scale_diag != 1) {
|
|
fprintf(stderr, "lmmin: logical variable scale_diag=%i, "
|
|
"should be 0 or 1\n",
|
|
C->scale_diag);
|
|
S->outcome = 10;
|
|
return;
|
|
}
|
|
|
|
/*** Allocate work space. ***/
|
|
|
|
/* Allocate total workspace with just one system call */
|
|
char* ws;
|
|
if ((ws = malloc((2*m + 5*n + m*n) * sizeof(double) +
|
|
n * sizeof(int))) == NULL) {
|
|
S->outcome = 9;
|
|
return;
|
|
}
|
|
|
|
/* Assign workspace segments. */
|
|
char* pws = ws;
|
|
double* fvec = (double*)pws;
|
|
pws += m * sizeof(double) / sizeof(char);
|
|
double* diag = (double*)pws;
|
|
pws += n * sizeof(double) / sizeof(char);
|
|
double* qtf = (double*)pws;
|
|
pws += n * sizeof(double) / sizeof(char);
|
|
double* fjac = (double*)pws;
|
|
pws += n * m * sizeof(double) / sizeof(char);
|
|
double* wa1 = (double*)pws;
|
|
pws += n * sizeof(double) / sizeof(char);
|
|
double* wa2 = (double*)pws;
|
|
pws += n * sizeof(double) / sizeof(char);
|
|
double* wa3 = (double*)pws;
|
|
pws += n * sizeof(double) / sizeof(char);
|
|
double* wf = (double*)pws;
|
|
pws += m * sizeof(double) / sizeof(char);
|
|
int* Pivot = (int*)pws;
|
|
pws += n * sizeof(int) / sizeof(char);
|
|
|
|
/* Initialize diag. */
|
|
if (!C->scale_diag)
|
|
for (j = 0; j < n; j++)
|
|
diag[j] = 1;
|
|
|
|
/*** Evaluate function at starting point and calculate norm. ***/
|
|
|
|
if (C->verbosity) {
|
|
fprintf(msgfile, "lmmin start ");
|
|
lm_print_pars(nout, x, msgfile);
|
|
}
|
|
(*evaluate)(x, m, data, fvec, &(S->userbreak));
|
|
if (C->verbosity > 4)
|
|
for (i = 0; i < m; ++i)
|
|
fprintf(msgfile, " fvec[%4i] = %18.8g\n", i, fvec[i]);
|
|
S->nfev = 1;
|
|
if (S->userbreak)
|
|
goto terminate;
|
|
fnorm = lm_enorm(m, fvec);
|
|
if (C->verbosity)
|
|
fprintf(msgfile, " fnorm = %18.8g\n", fnorm);
|
|
|
|
if (!isfinite(fnorm)) {
|
|
S->outcome = 12; /* nan */
|
|
goto terminate;
|
|
} else if (fnorm <= LM_DWARF) {
|
|
S->outcome = 0; /* sum of squares almost zero, nothing to do */
|
|
goto terminate;
|
|
}
|
|
|
|
/*** The outer loop: compute gradient, then descend. ***/
|
|
|
|
for (int outer = 0;; ++outer) {
|
|
|
|
/** Calculate the Jacobian. **/
|
|
for (j = 0; j < n; j++) {
|
|
temp = x[j];
|
|
step = MAX(eps * eps, eps * fabs(temp));
|
|
x[j] += step; /* replace temporarily */
|
|
(*evaluate)(x, m, data, wf, &(S->userbreak));
|
|
++(S->nfev);
|
|
if (S->userbreak)
|
|
goto terminate;
|
|
for (i = 0; i < m; i++)
|
|
fjac[j*m+i] = (wf[i] - fvec[i]) / step;
|
|
x[j] = temp; /* restore */
|
|
}
|
|
if (C->verbosity >= 10) {
|
|
/* print the entire matrix */
|
|
printf("\nlmmin Jacobian\n");
|
|
for (i = 0; i < m; i++) {
|
|
printf(" ");
|
|
for (j = 0; j < n; j++)
|
|
printf("%.5e ", fjac[j*m+i]);
|
|
printf("\n");
|
|
}
|
|
}
|
|
|
|
/** Compute the QR factorization of the Jacobian. **/
|
|
|
|
/* fjac is an m by n array. The upper n by n submatrix of fjac is made
|
|
* to contain an upper triangular matrix R with diagonal elements of
|
|
* nonincreasing magnitude such that
|
|
*
|
|
* P^T*(J^T*J)*P = R^T*R
|
|
*
|
|
* (NOTE: ^T stands for matrix transposition),
|
|
*
|
|
* where P is a permutation matrix and J is the final calculated
|
|
* Jacobian. Column j of P is column Pivot(j) of the identity matrix.
|
|
* The lower trapezoidal part of fjac contains information generated
|
|
* during the computation of R.
|
|
*
|
|
* Pivot is an integer array of length n. It defines a permutation
|
|
* matrix P such that jac*P = Q*R, where jac is the final calculated
|
|
* Jacobian, Q is orthogonal (not stored), and R is upper triangular
|
|
* with diagonal elements of nonincreasing magnitude. Column j of P
|
|
* is column Pivot(j) of the identity matrix.
|
|
*/
|
|
|
|
lm_qrfac(m, n, fjac, Pivot, wa1, wa2, wa3);
|
|
/* return values are Pivot, wa1=rdiag, wa2=acnorm */
|
|
|
|
/** Form Q^T * fvec, and store first n components in qtf. **/
|
|
for (i = 0; i < m; i++)
|
|
wf[i] = fvec[i];
|
|
|
|
for (j = 0; j < n; j++) {
|
|
temp3 = fjac[j*m+j];
|
|
if (temp3 != 0) {
|
|
sum = 0;
|
|
for (i = j; i < m; i++)
|
|
sum += fjac[j*m+i] * wf[i];
|
|
temp = -sum / temp3;
|
|
for (i = j; i < m; i++)
|
|
wf[i] += fjac[j*m+i] * temp;
|
|
}
|
|
fjac[j*m+j] = wa1[j];
|
|
qtf[j] = wf[j];
|
|
}
|
|
|
|
/** Compute norm of scaled gradient and detect degeneracy. **/
|
|
gnorm = 0;
|
|
for (j = 0; j < n; j++) {
|
|
if (wa2[Pivot[j]] == 0)
|
|
continue;
|
|
sum = 0;
|
|
for (i = 0; i <= j; i++)
|
|
sum += fjac[j*m+i] * qtf[i];
|
|
gnorm = MAX(gnorm, fabs(sum / wa2[Pivot[j]] / fnorm));
|
|
}
|
|
|
|
if (gnorm <= C->gtol) {
|
|
S->outcome = 4;
|
|
goto terminate;
|
|
}
|
|
|
|
/** Initialize or update diag and delta. **/
|
|
if (!outer) { /* first iteration only */
|
|
if (C->scale_diag) {
|
|
/* diag := norms of the columns of the initial Jacobian */
|
|
for (j = 0; j < n; j++)
|
|
diag[j] = wa2[j] ? wa2[j] : 1;
|
|
/* xnorm := || D x || */
|
|
for (j = 0; j < n; j++)
|
|
wa3[j] = diag[j] * x[j];
|
|
xnorm = lm_enorm(n, wa3);
|
|
if (C->verbosity >= 2) {
|
|
fprintf(msgfile, "lmmin diag ");
|
|
lm_print_pars(nout, x, msgfile); // xnorm
|
|
fprintf(msgfile, " xnorm = %18.8g\n", xnorm);
|
|
}
|
|
/* Only now print the header for the loop table. */
|
|
if (C->verbosity >= 3) {
|
|
fprintf(msgfile, " o i lmpar prered"
|
|
" ratio dirder delta"
|
|
" pnorm fnorm");
|
|
for (i = 0; i < nout; ++i)
|
|
fprintf(msgfile, " p%i", i);
|
|
fprintf(msgfile, "\n");
|
|
}
|
|
} else {
|
|
xnorm = lm_enorm(n, x);
|
|
}
|
|
if (!isfinite(xnorm)) {
|
|
S->outcome = 12; /* nan */
|
|
goto terminate;
|
|
}
|
|
/* Initialize the step bound delta. */
|
|
if (xnorm)
|
|
delta = C->stepbound * xnorm;
|
|
else
|
|
delta = C->stepbound;
|
|
} else {
|
|
if (C->scale_diag) {
|
|
for (j = 0; j < n; j++)
|
|
diag[j] = MAX(diag[j], wa2[j]);
|
|
}
|
|
}
|
|
|
|
/** The inner loop. **/
|
|
int inner = 0;
|
|
do {
|
|
|
|
/** Determine the Levenberg-Marquardt parameter. **/
|
|
lm_lmpar(n, fjac, m, Pivot, diag, qtf, delta, &lmpar,
|
|
wa1, wa2, wf, wa3);
|
|
/* used return values are fjac (partly), lmpar, wa1=x, wa3=diag*x */
|
|
|
|
/* Predict scaled reduction. */
|
|
pnorm = lm_enorm(n, wa3);
|
|
if (!isfinite(pnorm)) {
|
|
S->outcome = 12; /* nan */
|
|
goto terminate;
|
|
}
|
|
temp2 = lmpar * SQR(pnorm / fnorm);
|
|
for (j = 0; j < n; j++) {
|
|
wa3[j] = 0;
|
|
for (i = 0; i <= j; i++)
|
|
wa3[i] -= fjac[j*m+i] * wa1[Pivot[j]];
|
|
}
|
|
temp1 = SQR(lm_enorm(n, wa3) / fnorm);
|
|
if (!isfinite(temp1)) {
|
|
S->outcome = 12; /* nan */
|
|
goto terminate;
|
|
}
|
|
prered = temp1 + 2*temp2;
|
|
dirder = -temp1 + temp2; /* scaled directional derivative */
|
|
|
|
/* At first call, adjust the initial step bound. */
|
|
if (!outer && pnorm < delta)
|
|
delta = pnorm;
|
|
|
|
/** Evaluate the function at x + p. **/
|
|
for (j = 0; j < n; j++)
|
|
wa2[j] = x[j] - wa1[j];
|
|
(*evaluate)(wa2, m, data, wf, &(S->userbreak));
|
|
++(S->nfev);
|
|
if (S->userbreak)
|
|
goto terminate;
|
|
fnorm1 = lm_enorm(m, wf);
|
|
if (!isfinite(fnorm1)) {
|
|
S->outcome = 12; /* nan */
|
|
goto terminate;
|
|
}
|
|
|
|
/** Evaluate the scaled reduction. **/
|
|
|
|
/* Actual scaled reduction. */
|
|
actred = 1 - SQR(fnorm1 / fnorm);
|
|
|
|
/* Ratio of actual to predicted reduction. */
|
|
ratio = prered ? actred / prered : 0;
|
|
|
|
if (C->verbosity == 2) {
|
|
fprintf(msgfile, "lmmin (%i:%i) ", outer, inner);
|
|
lm_print_pars(nout, wa2, msgfile); // fnorm1,
|
|
} else if (C->verbosity >= 3) {
|
|
printf("%3i %2i %9.2g %9.2g %14.6g"
|
|
" %9.2g %10.3e %10.3e %21.15e",
|
|
outer, inner, lmpar, prered, ratio,
|
|
dirder, delta, pnorm, fnorm1);
|
|
for (i = 0; i < nout; ++i)
|
|
fprintf(msgfile, " %16.9g", wa2[i]);
|
|
fprintf(msgfile, "\n");
|
|
}
|
|
|
|
/* Update the step bound. */
|
|
if (ratio <= 0.25) {
|
|
if (actred >= 0)
|
|
temp = 0.5;
|
|
else if (actred > -99) /* -99 = 1-1/0.1^2 */
|
|
temp = MAX(dirder / (2*dirder + actred), 0.1);
|
|
else
|
|
temp = 0.1;
|
|
delta = temp * MIN(delta, pnorm / 0.1);
|
|
lmpar /= temp;
|
|
} else if (ratio >= 0.75) {
|
|
delta = 2 * pnorm;
|
|
lmpar *= 0.5;
|
|
} else if (!lmpar) {
|
|
delta = 2 * pnorm;
|
|
}
|
|
|
|
/** On success, update solution, and test for convergence. **/
|
|
|
|
inner_success = ratio >= 1e-4;
|
|
if (inner_success) {
|
|
|
|
/* Update x, fvec, and their norms. */
|
|
if (C->scale_diag) {
|
|
for (j = 0; j < n; j++) {
|
|
x[j] = wa2[j];
|
|
wa2[j] = diag[j] * x[j];
|
|
}
|
|
} else {
|
|
for (j = 0; j < n; j++)
|
|
x[j] = wa2[j];
|
|
}
|
|
for (i = 0; i < m; i++)
|
|
fvec[i] = wf[i];
|
|
xnorm = lm_enorm(n, wa2);
|
|
if (!isfinite(xnorm)) {
|
|
S->outcome = 12; /* nan */
|
|
goto terminate;
|
|
}
|
|
fnorm = fnorm1;
|
|
}
|
|
|
|
/* Convergence tests. */
|
|
S->outcome = 0;
|
|
if (fnorm <= LM_DWARF)
|
|
goto terminate; /* success: sum of squares almost zero */
|
|
/* Test two criteria (both may be fulfilled). */
|
|
if (fabs(actred) <= C->ftol && prered <= C->ftol && ratio <= 2)
|
|
S->outcome = 1; /* success: x almost stable */
|
|
if (delta <= C->xtol * xnorm)
|
|
S->outcome += 2; /* success: sum of squares almost stable */
|
|
if (S->outcome != 0) {
|
|
goto terminate;
|
|
}
|
|
|
|
/** Tests for termination and stringent tolerances. **/
|
|
if (S->nfev >= maxfev) {
|
|
S->outcome = 5;
|
|
goto terminate;
|
|
}
|
|
if (fabs(actred) <= LM_MACHEP && prered <= LM_MACHEP &&
|
|
ratio <= 2) {
|
|
S->outcome = 6;
|
|
goto terminate;
|
|
}
|
|
if (delta <= LM_MACHEP * xnorm) {
|
|
S->outcome = 7;
|
|
goto terminate;
|
|
}
|
|
if (gnorm <= LM_MACHEP) {
|
|
S->outcome = 8;
|
|
goto terminate;
|
|
}
|
|
|
|
/** End of the inner loop. Repeat if iteration unsuccessful. **/
|
|
++inner;
|
|
} while (!inner_success);
|
|
|
|
}; /*** End of the outer loop. ***/
|
|
|
|
terminate:
|
|
S->fnorm = lm_enorm(m, fvec);
|
|
if (C->verbosity >= 2)
|
|
printf("lmmin outcome (%i) xnorm %g ftol %g xtol %g\n", S->outcome,
|
|
xnorm, C->ftol, C->xtol);
|
|
if (C->verbosity & 1) {
|
|
fprintf(msgfile, "lmmin final ");
|
|
lm_print_pars(nout, x, msgfile); // S->fnorm,
|
|
fprintf(msgfile, " fnorm = %18.8g\n", S->fnorm);
|
|
}
|
|
if (S->userbreak) /* user-requested break */
|
|
S->outcome = 11;
|
|
|
|
/*** Deallocate the workspace. ***/
|
|
free(ws);
|
|
|
|
} /*** lmmin. ***/
|
|
|
|
/******************************************************************************/
|
|
/* lm_lmpar (determine Levenberg-Marquardt parameter) */
|
|
/******************************************************************************/
|
|
|
|
void lm_lmpar(const int n, double* r, const int ldr, const int* Pivot,
|
|
const double* diag, const double* qtb, const double delta,
|
|
double* par, double* x, double* Sdiag, double* aux, double* xdi)
|
|
/* Given an m by n matrix A, an n by n nonsingular diagonal matrix D,
|
|
* an m-vector b, and a positive number delta, the problem is to
|
|
* determine a parameter value par such that if x solves the system
|
|
*
|
|
* A*x = b and sqrt(par)*D*x = 0
|
|
*
|
|
* in the least squares sense, and dxnorm is the Euclidean norm of D*x,
|
|
* then either par=0 and (dxnorm-delta) < 0.1*delta, or par>0 and
|
|
* abs(dxnorm-delta) < 0.1*delta.
|
|
*
|
|
* Using lm_qrsolv, this subroutine completes the solution of the
|
|
* problem if it is provided with the necessary information from the
|
|
* QR factorization, with column pivoting, of A. That is, if A*P = Q*R,
|
|
* where P is a permutation matrix, Q has orthogonal columns, and R is
|
|
* an upper triangular matrix with diagonal elements of nonincreasing
|
|
* magnitude, then lmpar expects the full upper triangle of R, the
|
|
* permutation matrix P, and the first n components of Q^T*b. On output
|
|
* lmpar also provides an upper triangular matrix S such that
|
|
*
|
|
* P^T*(A^T*A + par*D*D)*P = S^T*S.
|
|
*
|
|
* S is employed within lmpar and may be of separate interest.
|
|
*
|
|
* Only a few iterations are generally needed for convergence of the
|
|
* algorithm. If, however, the limit of 10 iterations is reached, then
|
|
* the output par will contain the best value obtained so far.
|
|
*
|
|
* Parameters:
|
|
*
|
|
* n is a positive integer INPUT variable set to the order of r.
|
|
*
|
|
* r is an n by n array. On INPUT the full upper triangle must contain
|
|
* the full upper triangle of the matrix R. On OUTPUT the full upper
|
|
* triangle is unaltered, and the strict lower triangle contains the
|
|
* strict upper triangle (transposed) of the upper triangular matrix S.
|
|
*
|
|
* ldr is a positive integer INPUT variable not less than n which
|
|
* specifies the leading dimension of the array R.
|
|
*
|
|
* Pivot is an integer INPUT array of length n which defines the
|
|
* permutation matrix P such that A*P = Q*R. Column j of P is column
|
|
* Pivot(j) of the identity matrix.
|
|
*
|
|
* diag is an INPUT array of length n which must contain the diagonal
|
|
* elements of the matrix D.
|
|
*
|
|
* qtb is an INPUT array of length n which must contain the first
|
|
* n elements of the vector Q^T*b.
|
|
*
|
|
* delta is a positive INPUT variable which specifies an upper bound
|
|
* on the Euclidean norm of D*x.
|
|
*
|
|
* par is a nonnegative variable. On INPUT par contains an initial
|
|
* estimate of the Levenberg-Marquardt parameter. On OUTPUT par
|
|
* contains the final estimate.
|
|
*
|
|
* x is an OUTPUT array of length n which contains the least-squares
|
|
* solution of the system A*x = b, sqrt(par)*D*x = 0, for the output par.
|
|
*
|
|
* Sdiag is an array of length n needed as workspace; on OUTPUT it
|
|
* contains the diagonal elements of the upper triangular matrix S.
|
|
*
|
|
* aux is a multi-purpose work array of length n.
|
|
*
|
|
* xdi is a work array of length n. On OUTPUT: diag[j] * x[j].
|
|
*
|
|
*/
|
|
{
|
|
int i, iter, j, nsing;
|
|
double dxnorm, fp, fp_old, gnorm, parc, parl, paru;
|
|
double sum, temp;
|
|
static double p1 = 0.1;
|
|
|
|
/*** Compute and store in x the Gauss-Newton direction. If the Jacobian
|
|
is rank-deficient, obtain a least-squares solution. ***/
|
|
|
|
nsing = n;
|
|
for (j = 0; j < n; j++) {
|
|
aux[j] = qtb[j];
|
|
if (r[j*ldr+j] == 0 && nsing == n)
|
|
nsing = j;
|
|
if (nsing < n)
|
|
aux[j] = 0;
|
|
}
|
|
for (j = nsing-1; j >= 0; j--) {
|
|
aux[j] = aux[j] / r[j+ldr*j];
|
|
temp = aux[j];
|
|
for (i = 0; i < j; i++)
|
|
aux[i] -= r[j*ldr+i] * temp;
|
|
}
|
|
|
|
for (j = 0; j < n; j++)
|
|
x[Pivot[j]] = aux[j];
|
|
|
|
/*** Initialize the iteration counter, evaluate the function at the origin,
|
|
and test for acceptance of the Gauss-Newton direction. ***/
|
|
|
|
for (j = 0; j < n; j++)
|
|
xdi[j] = diag[j] * x[j];
|
|
dxnorm = lm_enorm(n, xdi);
|
|
fp = dxnorm - delta;
|
|
if (fp <= p1 * delta) {
|
|
#ifdef LMFIT_DEBUG_MESSAGES
|
|
printf("debug lmpar nsing=%d, n=%d, terminate[fp<=p1*del]\n", nsing, n);
|
|
#endif
|
|
*par = 0;
|
|
return;
|
|
}
|
|
|
|
/*** If the Jacobian is not rank deficient, the Newton step provides a
|
|
lower bound, parl, for the zero of the function. Otherwise set this
|
|
bound to zero. ***/
|
|
|
|
parl = 0;
|
|
if (nsing >= n) {
|
|
for (j = 0; j < n; j++)
|
|
aux[j] = diag[Pivot[j]] * xdi[Pivot[j]] / dxnorm;
|
|
|
|
for (j = 0; j < n; j++) {
|
|
sum = 0;
|
|
for (i = 0; i < j; i++)
|
|
sum += r[j*ldr+i] * aux[i];
|
|
aux[j] = (aux[j] - sum) / r[j+ldr*j];
|
|
}
|
|
temp = lm_enorm(n, aux);
|
|
parl = fp / delta / temp / temp;
|
|
}
|
|
|
|
/*** Calculate an upper bound, paru, for the zero of the function. ***/
|
|
|
|
for (j = 0; j < n; j++) {
|
|
sum = 0;
|
|
for (i = 0; i <= j; i++)
|
|
sum += r[j*ldr+i] * qtb[i];
|
|
aux[j] = sum / diag[Pivot[j]];
|
|
}
|
|
gnorm = lm_enorm(n, aux);
|
|
paru = gnorm / delta;
|
|
if (paru == 0)
|
|
paru = LM_DWARF / MIN(delta, p1);
|
|
|
|
/*** If the input par lies outside of the interval (parl,paru),
|
|
set par to the closer endpoint. ***/
|
|
|
|
*par = MAX(*par, parl);
|
|
*par = MIN(*par, paru);
|
|
if (*par == 0)
|
|
*par = gnorm / dxnorm;
|
|
|
|
/*** Iterate. ***/
|
|
|
|
for (iter = 0;; iter++) {
|
|
|
|
/** Evaluate the function at the current value of par. **/
|
|
if (*par == 0)
|
|
*par = MAX(LM_DWARF, 0.001 * paru);
|
|
temp = sqrt(*par);
|
|
for (j = 0; j < n; j++)
|
|
aux[j] = temp * diag[j];
|
|
|
|
lm_qrsolv(n, r, ldr, Pivot, aux, qtb, x, Sdiag, xdi);
|
|
/* return values are r, x, Sdiag */
|
|
|
|
for (j = 0; j < n; j++)
|
|
xdi[j] = diag[j] * x[j]; /* used as output */
|
|
dxnorm = lm_enorm(n, xdi);
|
|
fp_old = fp;
|
|
fp = dxnorm - delta;
|
|
|
|
/** If the function is small enough, accept the current value
|
|
of par. Also test for the exceptional cases where parl
|
|
is zero or the number of iterations has reached 10. **/
|
|
if (fabs(fp) <= p1 * delta ||
|
|
(parl == 0 && fp <= fp_old && fp_old < 0) || iter == 10) {
|
|
#ifdef LMFIT_DEBUG_MESSAGES
|
|
printf("debug lmpar nsing=%d, iter=%d, "
|
|
"par=%.4e [%.4e %.4e], delta=%.4e, fp=%.4e\n",
|
|
nsing, iter, *par, parl, paru, delta, fp);
|
|
#endif
|
|
break; /* the only exit from the iteration. */
|
|
}
|
|
|
|
/** Compute the Newton correction. **/
|
|
for (j = 0; j < n; j++)
|
|
aux[j] = diag[Pivot[j]] * xdi[Pivot[j]] / dxnorm;
|
|
|
|
for (j = 0; j < n; j++) {
|
|
aux[j] = aux[j] / Sdiag[j];
|
|
for (i = j+1; i < n; i++)
|
|
aux[i] -= r[j*ldr+i] * aux[j];
|
|
}
|
|
temp = lm_enorm(n, aux);
|
|
parc = fp / delta / temp / temp;
|
|
|
|
/** Depending on the sign of the function, update parl or paru. **/
|
|
if (fp > 0)
|
|
parl = MAX(parl, *par);
|
|
else /* fp < 0 [the case fp==0 is precluded by the break condition] */
|
|
paru = MIN(paru, *par);
|
|
|
|
/** Compute an improved estimate for par. **/
|
|
*par = MAX(parl, *par + parc);
|
|
}
|
|
|
|
} /*** lm_lmpar. ***/
|
|
|
|
/******************************************************************************/
|
|
/* lm_qrfac (QR factorization, from lapack) */
|
|
/******************************************************************************/
|
|
|
|
void lm_qrfac(const int m, const int n, double* A, int* Pivot, double* Rdiag,
|
|
double* Acnorm, double* W)
|
|
/*
|
|
* This subroutine uses Householder transformations with column pivoting
|
|
* to compute a QR factorization of the m by n matrix A. That is, qrfac
|
|
* determines an orthogonal matrix Q, a permutation matrix P, and an
|
|
* upper trapezoidal matrix R with diagonal elements of nonincreasing
|
|
* magnitude, such that A*P = Q*R. The Householder transformation for
|
|
* column k, k = 1,2,...,n, is of the form
|
|
*
|
|
* I - 2*w*wT/|w|^2
|
|
*
|
|
* where w has zeroes in the first k-1 positions.
|
|
*
|
|
* Parameters:
|
|
*
|
|
* m is an INPUT parameter set to the number of rows of A.
|
|
*
|
|
* n is an INPUT parameter set to the number of columns of A.
|
|
*
|
|
* A is an m by n array. On INPUT, A contains the matrix for which the
|
|
* QR factorization is to be computed. On OUTPUT the strict upper
|
|
* trapezoidal part of A contains the strict upper trapezoidal part
|
|
* of R, and the lower trapezoidal part of A contains a factored form
|
|
* of Q (the non-trivial elements of the vectors w described above).
|
|
*
|
|
* Pivot is an integer OUTPUT array of length n that describes the
|
|
* permutation matrix P. Column j of P is column Pivot(j) of the
|
|
* identity matrix.
|
|
*
|
|
* Rdiag is an OUTPUT array of length n which contains the diagonal
|
|
* elements of R.
|
|
*
|
|
* Acnorm is an OUTPUT array of length n which contains the norms of
|
|
* the corresponding columns of the input matrix A. If this information
|
|
* is not needed, then Acnorm can share storage with Rdiag.
|
|
*
|
|
* W is a work array of length n.
|
|
*
|
|
*/
|
|
{
|
|
int i, j, k, kmax;
|
|
double ajnorm, sum, temp;
|
|
|
|
#ifdef LMFIT_DEBUG_MESSAGES
|
|
printf("debug qrfac\n");
|
|
#endif
|
|
|
|
/** Compute initial column norms;
|
|
initialize Pivot with identity permutation. ***/
|
|
for (j = 0; j < n; j++) {
|
|
W[j] = Rdiag[j] = Acnorm[j] = lm_enorm(m, &A[j*m]);
|
|
Pivot[j] = j;
|
|
}
|
|
|
|
/** Loop over columns of A. **/
|
|
assert(n <= m);
|
|
for (j = 0; j < n; j++) {
|
|
|
|
/** Bring the column of largest norm into the pivot position. **/
|
|
kmax = j;
|
|
for (k = j+1; k < n; k++)
|
|
if (Rdiag[k] > Rdiag[kmax])
|
|
kmax = k;
|
|
|
|
if (kmax != j) {
|
|
/* Swap columns j and kmax. */
|
|
k = Pivot[j];
|
|
Pivot[j] = Pivot[kmax];
|
|
Pivot[kmax] = k;
|
|
for (i = 0; i < m; i++) {
|
|
temp = A[j*m+i];
|
|
A[j*m+i] = A[kmax*m+i];
|
|
A[kmax*m+i] = temp;
|
|
}
|
|
/* Half-swap: Rdiag[j], W[j] won't be needed any further. */
|
|
Rdiag[kmax] = Rdiag[j];
|
|
W[kmax] = W[j];
|
|
}
|
|
|
|
/** Compute the Householder reflection vector w_j to reduce the
|
|
j-th column of A to a multiple of the j-th unit vector. **/
|
|
ajnorm = lm_enorm(m-j, &A[j*m+j]);
|
|
if (ajnorm == 0) {
|
|
Rdiag[j] = 0;
|
|
continue;
|
|
}
|
|
|
|
/* Let the partial column vector A[j][j:] contain w_j := e_j+-a_j/|a_j|,
|
|
where the sign +- is chosen to avoid cancellation in w_jj. */
|
|
if (A[j*m+j] < 0)
|
|
ajnorm = -ajnorm;
|
|
for (i = j; i < m; i++)
|
|
A[j*m+i] /= ajnorm;
|
|
A[j*m+j] += 1;
|
|
|
|
/** Apply the Householder transformation U_w := 1 - 2*w_j.w_j/|w_j|^2
|
|
to the remaining columns, and update the norms. **/
|
|
for (k = j+1; k < n; k++) {
|
|
/* Compute scalar product w_j * a_j. */
|
|
sum = 0;
|
|
for (i = j; i < m; i++)
|
|
sum += A[j*m+i] * A[k*m+i];
|
|
|
|
/* Normalization is simplified by the coincidence |w_j|^2=2w_jj. */
|
|
temp = sum / A[j*m+j];
|
|
|
|
/* Carry out transform U_w_j * a_k. */
|
|
for (i = j; i < m; i++)
|
|
A[k*m+i] -= temp * A[j*m+i];
|
|
|
|
/* No idea what happens here. */
|
|
if (Rdiag[k] != 0) {
|
|
temp = A[m*k+j] / Rdiag[k];
|
|
if (fabs(temp) < 1) {
|
|
Rdiag[k] *= sqrt(1 - SQR(temp));
|
|
temp = Rdiag[k] / W[k];
|
|
} else
|
|
temp = 0;
|
|
if (temp == 0 || 0.05 * SQR(temp) <= LM_MACHEP) {
|
|
Rdiag[k] = lm_enorm(m-j-1, &A[m*k+j+1]);
|
|
W[k] = Rdiag[k];
|
|
}
|
|
}
|
|
}
|
|
|
|
Rdiag[j] = -ajnorm;
|
|
}
|
|
} /*** lm_qrfac. ***/
|
|
|
|
/******************************************************************************/
|
|
/* lm_qrsolv (linear least-squares) */
|
|
/******************************************************************************/
|
|
|
|
void lm_qrsolv(const int n, double* r, const int ldr, const int* Pivot,
|
|
const double* diag, const double* qtb, double* x,
|
|
double* Sdiag, double* W)
|
|
/*
|
|
* Given an m by n matrix A, an n by n diagonal matrix D, and an
|
|
* m-vector b, the problem is to determine an x which solves the
|
|
* system
|
|
*
|
|
* A*x = b and D*x = 0
|
|
*
|
|
* in the least squares sense.
|
|
*
|
|
* This subroutine completes the solution of the problem if it is
|
|
* provided with the necessary information from the QR factorization,
|
|
* with column pivoting, of A. That is, if A*P = Q*R, where P is a
|
|
* permutation matrix, Q has orthogonal columns, and R is an upper
|
|
* triangular matrix with diagonal elements of nonincreasing magnitude,
|
|
* then qrsolv expects the full upper triangle of R, the permutation
|
|
* matrix P, and the first n components of Q^T*b. The system
|
|
* A*x = b, D*x = 0, is then equivalent to
|
|
*
|
|
* R*z = Q^T*b, P^T*D*P*z = 0,
|
|
*
|
|
* where x = P*z. If this system does not have full rank, then a least
|
|
* squares solution is obtained. On output qrsolv also provides an upper
|
|
* triangular matrix S such that
|
|
*
|
|
* P^T*(A^T*A + D*D)*P = S^T*S.
|
|
*
|
|
* S is computed within qrsolv and may be of separate interest.
|
|
*
|
|
* Parameters:
|
|
*
|
|
* n is a positive integer INPUT variable set to the order of R.
|
|
*
|
|
* r is an n by n array. On INPUT the full upper triangle must contain
|
|
* the full upper triangle of the matrix R. On OUTPUT the full upper
|
|
* triangle is unaltered, and the strict lower triangle contains the
|
|
* strict upper triangle (transposed) of the upper triangular matrix S.
|
|
*
|
|
* ldr is a positive integer INPUT variable not less than n which
|
|
* specifies the leading dimension of the array R.
|
|
*
|
|
* Pivot is an integer INPUT array of length n which defines the
|
|
* permutation matrix P such that A*P = Q*R. Column j of P is column
|
|
* Pivot(j) of the identity matrix.
|
|
*
|
|
* diag is an INPUT array of length n which must contain the diagonal
|
|
* elements of the matrix D.
|
|
*
|
|
* qtb is an INPUT array of length n which must contain the first
|
|
* n elements of the vector Q^T*b.
|
|
*
|
|
* x is an OUTPUT array of length n which contains the least-squares
|
|
* solution of the system A*x = b, D*x = 0.
|
|
*
|
|
* Sdiag is an OUTPUT array of length n which contains the diagonal
|
|
* elements of the upper triangular matrix S.
|
|
*
|
|
* W is a work array of length n.
|
|
*
|
|
*/
|
|
{
|
|
int i, kk, j, k, nsing;
|
|
double qtbpj, sum, temp;
|
|
double _sin, _cos, _tan, _cot; /* local variables, not functions */
|
|
|
|
/*** Copy R and Q^T*b to preserve input and initialize S.
|
|
In particular, save the diagonal elements of R in x. ***/
|
|
|
|
for (j = 0; j < n; j++) {
|
|
for (i = j; i < n; i++)
|
|
r[j*ldr+i] = r[i*ldr+j];
|
|
x[j] = r[j*ldr+j];
|
|
W[j] = qtb[j];
|
|
}
|
|
|
|
/*** Eliminate the diagonal matrix D using a Givens rotation. ***/
|
|
|
|
for (j = 0; j < n; j++) {
|
|
|
|
/*** Prepare the row of D to be eliminated, locating the diagonal
|
|
element using P from the QR factorization. ***/
|
|
|
|
if (diag[Pivot[j]] != 0) {
|
|
for (k = j; k < n; k++)
|
|
Sdiag[k] = 0;
|
|
Sdiag[j] = diag[Pivot[j]];
|
|
|
|
/*** The transformations to eliminate the row of D modify only
|
|
a single element of Q^T*b beyond the first n, which is
|
|
initially 0. ***/
|
|
|
|
qtbpj = 0;
|
|
for (k = j; k < n; k++) {
|
|
|
|
/** Determine a Givens rotation which eliminates the
|
|
appropriate element in the current row of D. **/
|
|
if (Sdiag[k] == 0)
|
|
continue;
|
|
kk = k + ldr * k;
|
|
if (fabs(r[kk]) < fabs(Sdiag[k])) {
|
|
_cot = r[kk] / Sdiag[k];
|
|
_sin = 1 / hypot(1, _cot);
|
|
_cos = _sin * _cot;
|
|
} else {
|
|
_tan = Sdiag[k] / r[kk];
|
|
_cos = 1 / hypot(1, _tan);
|
|
_sin = _cos * _tan;
|
|
}
|
|
|
|
/** Compute the modified diagonal element of R and
|
|
the modified element of (Q^T*b,0). **/
|
|
r[kk] = _cos * r[kk] + _sin * Sdiag[k];
|
|
temp = _cos * W[k] + _sin * qtbpj;
|
|
qtbpj = -_sin * W[k] + _cos * qtbpj;
|
|
W[k] = temp;
|
|
|
|
/** Accumulate the tranformation in the row of S. **/
|
|
for (i = k+1; i < n; i++) {
|
|
temp = _cos * r[k*ldr+i] + _sin * Sdiag[i];
|
|
Sdiag[i] = -_sin * r[k*ldr+i] + _cos * Sdiag[i];
|
|
r[k*ldr+i] = temp;
|
|
}
|
|
}
|
|
}
|
|
|
|
/** Store the diagonal element of S and restore
|
|
the corresponding diagonal element of R. **/
|
|
Sdiag[j] = r[j*ldr+j];
|
|
r[j*ldr+j] = x[j];
|
|
}
|
|
|
|
/*** Solve the triangular system for z. If the system is singular, then
|
|
obtain a least-squares solution. ***/
|
|
|
|
nsing = n;
|
|
for (j = 0; j < n; j++) {
|
|
if (Sdiag[j] == 0 && nsing == n)
|
|
nsing = j;
|
|
if (nsing < n)
|
|
W[j] = 0;
|
|
}
|
|
|
|
for (j = nsing-1; j >= 0; j--) {
|
|
sum = 0;
|
|
for (i = j+1; i < nsing; i++)
|
|
sum += r[j*ldr+i] * W[i];
|
|
W[j] = (W[j] - sum) / Sdiag[j];
|
|
}
|
|
|
|
/*** Permute the components of z back to components of x. ***/
|
|
|
|
for (j = 0; j < n; j++)
|
|
x[Pivot[j]] = W[j];
|
|
|
|
} /*** lm_qrsolv. ***/
|
|
|
|
/******************************************************************************/
|
|
/* lm_enorm (Euclidean norm) */
|
|
/******************************************************************************/
|
|
|
|
double lm_enorm(int n, const double* x)
|
|
/* This function calculates the Euclidean norm of an n-vector x.
|
|
*
|
|
* The Euclidean norm is computed by accumulating the sum of squares
|
|
* in three different sums. The sums of squares for the small and large
|
|
* components are scaled so that no overflows occur. Non-destructive
|
|
* underflows are permitted. Underflows and overflows do not occur in
|
|
* the computation of the unscaled sum of squares for the intermediate
|
|
* components. The definitions of small, intermediate and large components
|
|
* depend on two constants, LM_SQRT_DWARF and LM_SQRT_GIANT. The main
|
|
* restrictions on these constants are that LM_SQRT_DWARF**2 not underflow
|
|
* and LM_SQRT_GIANT**2 not overflow.
|
|
*
|
|
* Parameters:
|
|
*
|
|
* n is a positive integer INPUT variable.
|
|
*
|
|
* x is an INPUT array of length n.
|
|
*/
|
|
{
|
|
int i;
|
|
double agiant, s1, s2, s3, xabs, x1max, x3max;
|
|
|
|
s1 = 0;
|
|
s2 = 0;
|
|
s3 = 0;
|
|
x1max = 0;
|
|
x3max = 0;
|
|
agiant = LM_SQRT_GIANT / n;
|
|
|
|
/** Sum squares. **/
|
|
for (i = 0; i < n; i++) {
|
|
xabs = fabs(x[i]);
|
|
if (xabs > LM_SQRT_DWARF) {
|
|
if (xabs < agiant) {
|
|
s2 += SQR(xabs);
|
|
} else if (xabs > x1max) {
|
|
s1 = 1 + s1 * SQR(x1max / xabs);
|
|
x1max = xabs;
|
|
} else {
|
|
s1 += SQR(xabs / x1max);
|
|
}
|
|
} else if (xabs > x3max) {
|
|
s3 = 1 + s3 * SQR(x3max / xabs);
|
|
x3max = xabs;
|
|
} else if (xabs != 0) {
|
|
s3 += SQR(xabs / x3max);
|
|
}
|
|
}
|
|
|
|
/** Calculate the norm. **/
|
|
if (s1 != 0)
|
|
return x1max * sqrt(s1 + (s2 / x1max) / x1max);
|
|
else if (s2 != 0)
|
|
if (s2 >= x3max)
|
|
return sqrt(s2 * (1 + (x3max / s2) * (x3max * s3)));
|
|
else
|
|
return sqrt(x3max * ((s2 / x3max) + (x3max * s3)));
|
|
else
|
|
return x3max * sqrt(s3);
|
|
|
|
} /*** lm_enorm. ***/
|