RDKit
Open-source cheminformatics and machine learning.
BFGSOpt.h
Go to the documentation of this file.
1 //
2 // Copyright (C) 2004-2008 Greg Landrum and Rational Discovery LLC
3 //
4 // @@ All Rights Reserved @@
5 // This file is part of the RDKit.
6 // The contents are covered by the terms of the BSD license
7 // which is included in the file license.txt, found at the root
8 // of the RDKit source tree.
9 //
10 #include <RDGeneral/export.h>
11 #include <math.h>
12 #include <RDGeneral/Invariant.h>
14 #include <cstring>
15 #include <vector>
16 #include <algorithm>
17 
18 namespace BFGSOpt {
21 const double FUNCTOL =
22  1e-4; //!< Default tolerance for function convergence in the minimizer
23 const double MOVETOL =
24  1e-7; //!< Default tolerance for x changes in the minimizer
25 const int MAXITS = 200; //!< Default maximum number of iterations
26 const double EPS = 3e-8; //!< Default gradient tolerance in the minimizer
27 const double TOLX =
28  4. * EPS; //!< Default direction vector tolerance in the minimizer
29 const double MAXSTEP = 100.0; //!< Default maximim step size in the minimizer
30 
31 //! Do a Quasi-Newton minimization along a line.
32 /*!
33  See Numerical Recipes in C, Section 9.7 for a description of the algorithm.
34 
35  \param dim the dimensionality of the space.
36  \param oldPt the current position, as an array.
37  \param oldVal the current function value.
38  \param grad the value of the function gradient at oldPt
39  \param dir the minimization direction
40  \param newPt used to return the final position
41  \param newVal used to return the final function value
42  \param func the function to minimize
43  \param maxStep the maximum allowable step size
44  \param resCode used to return the results of the search.
45 
46  Possible values for resCode are on return are:
47  - 0: success
48  - 1: the stepsize got too small. This probably indicates success.
49  - -1: the direction is bad (orthogonal to the gradient)
50 */
51 template <typename EnergyFunctor>
52 void linearSearch(unsigned int dim, double *oldPt, double oldVal, double *grad,
53  double *dir, double *newPt, double &newVal,
54  EnergyFunctor func, double maxStep, int &resCode) {
55  PRECONDITION(oldPt, "bad input array");
56  PRECONDITION(grad, "bad input array");
57  PRECONDITION(dir, "bad input array");
58  PRECONDITION(newPt, "bad input array");
59 
60  const unsigned int MAX_ITER_LINEAR_SEARCH = 1000;
61  double sum = 0.0, slope = 0.0, test = 0.0, lambda = 0.0;
62  double lambda2 = 0.0, lambdaMin = 0.0, tmpLambda = 0.0, val2 = 0.0;
63 
64  resCode = -1;
65 
66  // get the length of the direction vector:
67  sum = 0.0;
68  for (unsigned int i = 0; i < dim; i++) sum += dir[i] * dir[i];
69  sum = sqrt(sum);
70 
71  // rescale if we're trying to move too far:
72  if (sum > maxStep) {
73  for (unsigned int i = 0; i < dim; i++) dir[i] *= maxStep / sum;
74  }
75 
76  // make sure our direction has at least some component along
77  // -grad
78  slope = 0.0;
79  for (unsigned int i = 0; i < dim; i++) {
80  slope += dir[i] * grad[i];
81  }
82  if (slope >= 0.0) {
83  return;
84  }
85 
86  test = 0.0;
87  for (unsigned int i = 0; i < dim; i++) {
88  double temp = fabs(dir[i]) / std::max(fabs(oldPt[i]), 1.0);
89  if (temp > test) test = temp;
90  }
91 
92  lambdaMin = MOVETOL / test;
93  lambda = 1.0;
94  unsigned int it = 0;
95  while (it < MAX_ITER_LINEAR_SEARCH) {
96  // std::cerr << "\t" << it<<" : "<<lambda << " " << lambdaMin << std::endl;
97  if (lambda < lambdaMin) {
98  // the position change is too small
99  resCode = 1;
100  break;
101  }
102  for (unsigned int i = 0; i < dim; i++) {
103  newPt[i] = oldPt[i] + lambda * dir[i];
104  }
105  newVal = func(newPt);
106 
107  if (newVal - oldVal <= FUNCTOL * lambda * slope) {
108  // we're converged on the function:
109  resCode = 0;
110  return;
111  }
112  // if we made it this far, we need to backtrack:
113  if (it == 0) {
114  // it's the first step:
115  tmpLambda = -slope / (2.0 * (newVal - oldVal - slope));
116  } else {
117  double rhs1 = newVal - oldVal - lambda * slope;
118  double rhs2 = val2 - oldVal - lambda2 * slope;
119  double a = (rhs1 / (lambda * lambda) - rhs2 / (lambda2 * lambda2)) /
120  (lambda - lambda2);
121  double b = (-lambda2 * rhs1 / (lambda * lambda) +
122  lambda * rhs2 / (lambda2 * lambda2)) /
123  (lambda - lambda2);
124  if (a == 0.0) {
125  tmpLambda = -slope / (2.0 * b);
126  } else {
127  double disc = b * b - 3 * a * slope;
128  if (disc < 0.0) {
129  tmpLambda = 0.5 * lambda;
130  } else if (b <= 0.0) {
131  tmpLambda = (-b + sqrt(disc)) / (3.0 * a);
132  } else {
133  tmpLambda = -slope / (b + sqrt(disc));
134  }
135  }
136  if (tmpLambda > 0.5 * lambda) {
137  tmpLambda = 0.5 * lambda;
138  }
139  }
140  lambda2 = lambda;
141  val2 = newVal;
142  lambda = std::max(tmpLambda, 0.1 * lambda);
143  ++it;
144  }
145  // nothing was done
146  // std::cerr<<" RETURN AT END: "<<it<<" "<<resCode<<std::endl;
147  for (unsigned int i = 0; i < dim; i++) {
148  newPt[i] = oldPt[i];
149  }
150 }
151 
152 #define CLEANUP() \
153  { \
154  delete[] grad; \
155  delete[] dGrad; \
156  delete[] hessDGrad; \
157  delete[] newPos; \
158  delete[] xi; \
159  delete[] invHessian; \
160  }
161 //! Do a BFGS minimization of a function.
162 /*!
163  See Numerical Recipes in C, Section 10.7 for a description of the algorithm.
164 
165  \param dim the dimensionality of the space.
166  \param pos the starting position, as an array.
167  \param gradTol tolerance for gradient convergence
168  \param numIters used to return the number of iterations required
169  \param funcVal used to return the final function value
170  \param func the function to minimize
171  \param gradFunc calculates the gradient of func
172  \param funcTol tolerance for changes in the function value for convergence.
173  \param maxIts maximum number of iterations allowed
174  \param snapshotFreq a snapshot of the minimization trajectory
175  will be stored after as many steps as indicated
176  through this parameter; defaults to 0 (no
177  snapshots stored)
178  \param snapshotVect pointer to a std::vector<Snapshot> object that will
179  receive the coordinates and energies every snapshotFreq steps; defaults to
180  NULL (no snapshots stored)
181 
182  \return a flag indicating success (or type of failure). Possible values are:
183  - 0: success
184  - 1: too many iterations were required
185 */
186 template <typename EnergyFunctor, typename GradientFunctor>
187 int minimize(unsigned int dim, double *pos, double gradTol,
188  unsigned int &numIters, double &funcVal, EnergyFunctor func,
189  GradientFunctor gradFunc, unsigned int snapshotFreq,
190  RDKit::SnapshotVect *snapshotVect, double funcTol = TOLX,
191  unsigned int maxIts = MAXITS) {
192  RDUNUSED_PARAM(funcTol);
193  PRECONDITION(pos, "bad input array");
194  PRECONDITION(gradTol > 0, "bad tolerance");
195 
196  double sum, maxStep, fp;
197 
198  double *grad, *dGrad, *hessDGrad;
199  double *newPos, *xi;
200  double *invHessian;
201 
202  grad = new double[dim];
203  dGrad = new double[dim];
204  hessDGrad = new double[dim];
205  newPos = new double[dim];
206  xi = new double[dim];
207  invHessian = new double[dim * dim];
208  snapshotFreq = std::min(snapshotFreq, maxIts);
209 
210  // evaluate the function and gradient in our current position:
211  fp = func(pos);
212  gradFunc(pos, grad);
213 
214  sum = 0.0;
215  memset(invHessian, 0, dim * dim * sizeof(double));
216  for (unsigned int i = 0; i < dim; i++) {
217  unsigned int itab = i * dim;
218  // initialize the inverse hessian to be identity:
219  invHessian[itab + i] = 1.0;
220  // the first line dir is -grad:
221  xi[i] = -grad[i];
222  sum += pos[i] * pos[i];
223  }
224  // pick a max step size:
225  maxStep = MAXSTEP * std::max(sqrt(sum), static_cast<double>(dim));
226 
227  for (unsigned int iter = 1; iter <= maxIts; iter++) {
228  numIters = iter;
229  int status;
230 
231  // do the line search:
232  linearSearch(dim, pos, fp, grad, xi, newPos, funcVal, func, maxStep,
233  status);
234  CHECK_INVARIANT(status >= 0, "bad direction in linearSearch");
235 
236  // save the function value for the next search:
237  fp = funcVal;
238 
239  // set the direction of this line and save the gradient:
240  double test = 0.0;
241  for (unsigned int i = 0; i < dim; i++) {
242  xi[i] = newPos[i] - pos[i];
243  pos[i] = newPos[i];
244  double temp = fabs(xi[i]) / std::max(fabs(pos[i]), 1.0);
245  if (temp > test) test = temp;
246  dGrad[i] = grad[i];
247  }
248  // std::cerr<<" iter: "<<iter<<" "<<fp<<" "<<test<<"
249  // "<<TOLX<<std::endl;
250  if (test < TOLX) {
251  if (snapshotVect && snapshotFreq) {
252  RDKit::Snapshot s(boost::shared_array<double>(newPos), fp);
253  snapshotVect->push_back(s);
254  newPos = NULL;
255  }
256  CLEANUP();
257  return 0;
258  }
259 
260  // update the gradient:
261  double gradScale = gradFunc(pos, grad);
262 
263  // is the gradient converged?
264  test = 0.0;
265  double term = std::max(funcVal * gradScale, 1.0);
266  for (unsigned int i = 0; i < dim; i++) {
267  double temp = fabs(grad[i]) * std::max(fabs(pos[i]), 1.0);
268  test = std::max(test, temp);
269  dGrad[i] = grad[i] - dGrad[i];
270  }
271  test /= term;
272  // std::cerr<<" "<<gradScale<<" "<<test<<"
273  // "<<gradTol<<std::endl;
274  if (test < gradTol) {
275  if (snapshotVect && snapshotFreq) {
276  RDKit::Snapshot s(boost::shared_array<double>(newPos), fp);
277  snapshotVect->push_back(s);
278  newPos = NULL;
279  }
280  CLEANUP();
281  return 0;
282  }
283 
284  // for(unsigned int i=0;i<dim;i++){
285  // figure out how much the gradient changed:
286  //}
287 
288  // compute hessian*dGrad:
289  double fac = 0, fae = 0, sumDGrad = 0, sumXi = 0;
290  for (unsigned int i = 0; i < dim; i++) {
291 #if 0
292  unsigned int itab = i * dim;
293  hessDGrad[i] = 0.0;
294  for (unsigned int j = 0; j < dim; j++) {
295  hessDGrad[i] += invHessian[itab + j] * dGrad[j];
296  }
297 
298 #else
299  double *ivh = &(invHessian[i * dim]);
300  double &hdgradi = hessDGrad[i];
301  double *dgj = dGrad;
302  hdgradi = 0.0;
303  for (unsigned int j = 0; j < dim; ++j, ++ivh, ++dgj) {
304  hdgradi += *ivh * *dgj;
305  }
306 #endif
307  fac += dGrad[i] * xi[i];
308  fae += dGrad[i] * hessDGrad[i];
309  sumDGrad += dGrad[i] * dGrad[i];
310  sumXi += xi[i] * xi[i];
311  }
312  if (fac > sqrt(EPS * sumDGrad * sumXi)) {
313  fac = 1.0 / fac;
314  double fad = 1.0 / fae;
315  for (unsigned int i = 0; i < dim; i++) {
316  dGrad[i] = fac * xi[i] - fad * hessDGrad[i];
317  }
318 
319  for (unsigned int i = 0; i < dim; i++) {
320  unsigned int itab = i * dim;
321 #if 0
322  for (unsigned int j = i; j < dim; j++) {
323  invHessian[itab + j] += fac * xi[i] * xi[j] -
324  fad * hessDGrad[i] * hessDGrad[j] +
325  fae * dGrad[i] * dGrad[j];
326  invHessian[j * dim + i] = invHessian[itab + j];
327  }
328 #else
329  double pxi = fac * xi[i], hdgi = fad * hessDGrad[i],
330  dgi = fae * dGrad[i];
331  double *pxj = &(xi[i]), *hdgj = &(hessDGrad[i]), *dgj = &(dGrad[i]);
332  for (unsigned int j = i; j < dim; ++j, ++pxj, ++hdgj, ++dgj) {
333  invHessian[itab + j] += pxi * *pxj - hdgi * *hdgj + dgi * *dgj;
334  invHessian[j * dim + i] = invHessian[itab + j];
335  }
336 #endif
337  }
338  }
339  // generate the next direction to move:
340  for (unsigned int i = 0; i < dim; i++) {
341  unsigned int itab = i * dim;
342  xi[i] = 0.0;
343 #if 0
344  for (unsigned int j = 0; j < dim; j++) {
345  xi[i] -= invHessian[itab + j] * grad[j];
346  }
347 #else
348  double &pxi = xi[i], *ivh = &(invHessian[itab]), *gj = grad;
349  for (unsigned int j = 0; j < dim; ++j, ++ivh, ++gj) {
350  pxi -= *ivh * *gj;
351  }
352 #endif
353  }
354  if (snapshotVect && snapshotFreq && !(iter % snapshotFreq)) {
355  RDKit::Snapshot s(boost::shared_array<double>(newPos), fp);
356  snapshotVect->push_back(s);
357  newPos = new double[dim];
358  }
359  }
360  CLEANUP();
361  return 1;
362 }
363 
364 //! Do a BFGS minimization of a function.
365 /*!
366  \param dim the dimensionality of the space.
367  \param pos the starting position, as an array.
368  \param gradTol tolerance for gradient convergence
369  \param numIters used to return the number of iterations required
370  \param funcVal used to return the final function value
371  \param func the function to minimize
372  \param gradFunc calculates the gradient of func
373  \param funcTol tolerance for changes in the function value for convergence.
374  \param maxIts maximum number of iterations allowed
375 
376  \return a flag indicating success (or type of failure). Possible values are:
377  - 0: success
378  - 1: too many iterations were required
379 */
380 template <typename EnergyFunctor, typename GradientFunctor>
381 int minimize(unsigned int dim, double *pos, double gradTol,
382  unsigned int &numIters, double &funcVal, EnergyFunctor func,
383  GradientFunctor gradFunc, double funcTol = TOLX,
384  unsigned int maxIts = MAXITS) {
385  return minimize(dim, pos, gradTol, numIters, funcVal, func, gradFunc, 0, NULL,
386  funcTol, maxIts);
387 }
388 
389 } // namespace BFGSOpt
BFGSOpt::FUNCTOL
const double FUNCTOL
Default tolerance for function convergence in the minimizer.
Definition: BFGSOpt.h:21
CLEANUP
#define CLEANUP()
Definition: BFGSOpt.h:152
CHECK_INVARIANT
#define CHECK_INVARIANT(expr, mess)
Definition: Invariant.h:101
Snapshot.h
BFGSOpt::HEAD_ONLY_LIBRARY
RDKIT_OPTIMIZER_EXPORT int HEAD_ONLY_LIBRARY
RDUNUSED_PARAM
#define RDUNUSED_PARAM(x)
Definition: Invariant.h:196
BFGSOpt::MOVETOL
const double MOVETOL
Default tolerance for x changes in the minimizer.
Definition: BFGSOpt.h:23
Invariant.h
BFGSOpt::REALLY_A_HEADER_ONLY_LIBRARY
RDKIT_OPTIMIZER_EXPORT int REALLY_A_HEADER_ONLY_LIBRARY
RDKit::SnapshotVect
std::vector< Snapshot > SnapshotVect
Definition: Snapshot.h:19
BFGSOpt::EPS
const double EPS
Default gradient tolerance in the minimizer.
Definition: BFGSOpt.h:26
BFGSOpt::minimize
int minimize(unsigned int dim, double *pos, double gradTol, unsigned int &numIters, double &funcVal, EnergyFunctor func, GradientFunctor gradFunc, unsigned int snapshotFreq, RDKit::SnapshotVect *snapshotVect, double funcTol=TOLX, unsigned int maxIts=MAXITS)
Do a BFGS minimization of a function.
Definition: BFGSOpt.h:187
BFGSOpt
Definition: BFGSOpt.h:18
RDKIT_OPTIMIZER_EXPORT
#define RDKIT_OPTIMIZER_EXPORT
Definition: export.h:463
RDKit::Snapshot
Definition: Snapshot.h:25
ForceFields::UFF::Params::lambda
const double lambda
scaling factor for rBO correction
Definition: UFF/Params.h:89
BFGSOpt::linearSearch
void linearSearch(unsigned int dim, double *oldPt, double oldVal, double *grad, double *dir, double *newPt, double &newVal, EnergyFunctor func, double maxStep, int &resCode)
Do a Quasi-Newton minimization along a line.
Definition: BFGSOpt.h:52
BFGSOpt::TOLX
const double TOLX
Default direction vector tolerance in the minimizer.
Definition: BFGSOpt.h:27
PRECONDITION
#define PRECONDITION(expr, mess)
Definition: Invariant.h:109
BFGSOpt::MAXSTEP
const double MAXSTEP
Default maximim step size in the minimizer.
Definition: BFGSOpt.h:29
BFGSOpt::MAXITS
const int MAXITS
Default maximum number of iterations.
Definition: BFGSOpt.h:25
export.h