RDKit
Open-source cheminformatics and machine learning.
Embedder.h
Go to the documentation of this file.
1 //
2 // Copyright (C) 2004-2017 Greg Landrum and Rational Discovery LLC
3 //
4 // @@ All Rights Reserved @@
5 // This file is part of the RDKit.
6 // The contents are covered by the terms of the BSD license
7 // which is included in the file license.txt, found at the root
8 // of the RDKit source tree.
9 //
10 
11 #include <RDGeneral/export.h>
12 #ifndef RD_EMBEDDER_H_GUARD
13 #define RD_EMBEDDER_H_GUARD
14 
15 #include <map>
16 #include <Geometry/point.h>
17 #include <GraphMol/ROMol.h>
18 #include <boost/shared_ptr.hpp>
19 #include <DistGeom/BoundsMatrix.h>
20 
21 namespace RDKit {
22 namespace DGeomHelpers {
23 
24 //! Parameter object for controlling embedding
25 /*!
26  numConfs Number of conformations to be generated
27 
28  numThreads Sets the number of threads to use (more than one thread
29  will only be used if the RDKit was build with multithread
30  support) If set to zero, the max supported by the system will
31  be used.
32 
33  maxIterations Max. number of times the embedding will be tried if
34  coordinates are not obtained successfully. The default
35  value is 10x the number of atoms.
36 
37  randomSeed provides a seed for the random number generator (so that
38  the same coordinates can be obtained for a
39  molecule on multiple runs) If -1, the
40  RNG will not be seeded.
41 
42  clearConfs Clear all existing conformations on the molecule
43 
44  useRandomCoords Start the embedding from random coordinates instead of
45  using eigenvalues of the distance matrix.
46 
47  boxSizeMult Determines the size of the box that is used for
48  random coordinates. If this is a positive number, the
49  side length will equal the largest element of the distance
50  matrix times \c boxSizeMult. If this is a negative number,
51  the side length will equal \c -boxSizeMult (i.e. independent
52  of the elements of the distance matrix).
53 
54  randNegEig Picks coordinates at random when a embedding process produces
55  negative eigenvalues
56 
57  numZeroFail Fail embedding if we find this many or more zero eigenvalues
58  (within a tolerance)
59 
60  pruneRmsThresh Retain only the conformations out of 'numConfs' after
61  embedding that are at least this far apart from each other.
62  RMSD is computed on the heavy atoms.
63  Prunining is greedy; i.e. the first embedded conformation is
64  retained and from then on only those that are at least
65  \c pruneRmsThresh away from already
66  retained conformations are kept. The pruning is done
67  after embedding and bounds violation minimization.
68  No pruning by default.
69 
70  coordMap a map of int to Point3D, between atom IDs and their locations
71  their locations. If this container is provided, the
72  coordinates are used to set distance constraints on the
73  embedding. The resulting conformer(s) should have distances
74  between the specified atoms that reproduce those between the
75  points in \c coordMap. Because the embedding produces a
76  molecule in an arbitrary reference frame, an alignment step
77  is required to actually reproduce the provided coordinates.
78 
79  optimizerForceTol set the tolerance on forces in the DGeom optimizer
80  (this shouldn't normally be altered in client code).
81 
82  ignoreSmoothingFailures try to embed the molecule even if triangle bounds
83  smoothing fails
84 
85  enforceChirality enforce the correct chirality if chiral centers are present
86 
87  useExpTorsionAnglePrefs impose experimental torsion-angle preferences
88 
89  useBasicKnowledge impose "basic knowledge" terms such as flat
90  aromatic rings, ketones, etc.
91 
92  ETversion version of the experimental torsion-angle preferences
93 
94  verbose print output of experimental torsion-angle preferences
95 
96  basinThresh set the basin threshold for the DGeom force field,
97  (this shouldn't normally be altered in client code).
98 
99  onlyHeavyAtomsForRMS only use the heavy atoms when doing RMS filtering
100 */
102  unsigned int maxIterations;
107  double boxSizeMult;
109  unsigned int numZeroFail;
110  const std::map<int, RDGeom::Point3D> *coordMap;
116  bool verbose;
117  double basinThresh;
120  unsigned int ETversion;
121  boost::shared_ptr<const DistGeom::BoundsMatrix> boundsMat;
124  : maxIterations(0),
125  numThreads(1),
126  randomSeed(-1),
127  clearConfs(true),
128  useRandomCoords(false),
129  boxSizeMult(2.0),
130  randNegEig(true),
131  numZeroFail(1),
132  coordMap(NULL),
133  optimizerForceTol(1e-3),
134  ignoreSmoothingFailures(false),
135  enforceChirality(true),
136  useExpTorsionAnglePrefs(false),
137  useBasicKnowledge(false),
138  verbose(false),
139  basinThresh(5.0),
140  pruneRmsThresh(-1.0),
141  onlyHeavyAtomsForRMS(false),
142  ETversion(1),
143  boundsMat(nullptr),
144  embedFragmentsSeparately(true){};
145  EmbedParameters(unsigned int maxIterations, int numThreads, int randomSeed,
146  bool clearConfs, bool useRandomCoords, double boxSizeMult,
147  bool randNegEig, unsigned int numZeroFail,
148  const std::map<int, RDGeom::Point3D> *coordMap,
149  double optimizerForceTol, bool ignoreSmoothingFailures,
150  bool enforceChirality, bool useExpTorsionAnglePrefs,
151  bool useBasicKnowledge, bool verbose, double basinThresh,
152  double pruneRmsThresh, bool onlyHeavyAtomsForRMS,
153  unsigned int ETversion = 1,
154  const DistGeom::BoundsMatrix *boundsMat = nullptr,
155  bool embedFragmentsSeparately = true)
156  : maxIterations(maxIterations),
157  numThreads(numThreads),
158  randomSeed(randomSeed),
159  clearConfs(clearConfs),
160  useRandomCoords(useRandomCoords),
161  boxSizeMult(boxSizeMult),
162  randNegEig(randNegEig),
163  numZeroFail(numZeroFail),
164  coordMap(coordMap),
165  optimizerForceTol(optimizerForceTol),
166  ignoreSmoothingFailures(ignoreSmoothingFailures),
167  enforceChirality(enforceChirality),
168  useExpTorsionAnglePrefs(useExpTorsionAnglePrefs),
169  useBasicKnowledge(useBasicKnowledge),
170  verbose(verbose),
171  basinThresh(basinThresh),
172  pruneRmsThresh(pruneRmsThresh),
173  onlyHeavyAtomsForRMS(onlyHeavyAtomsForRMS),
174  ETversion(ETversion),
175  boundsMat(boundsMat),
176  embedFragmentsSeparately(embedFragmentsSeparately){};
177 };
178 
179 //*! Embed multiple conformations for a molecule
181  ROMol &mol, INT_VECT &res, unsigned int numConfs,
182  const EmbedParameters &params);
183 inline INT_VECT EmbedMultipleConfs(ROMol &mol, unsigned int numConfs,
184  const EmbedParameters &params) {
185  INT_VECT res;
186  EmbedMultipleConfs(mol, res, numConfs, params);
187  return res;
188 }
189 
190 //! Compute an embedding (in 3D) for the specified molecule using Distance
191 // Geometry
192 inline int EmbedMolecule(ROMol &mol, const EmbedParameters &params) {
193  INT_VECT confIds;
194  EmbedMultipleConfs(mol, confIds, 1, params);
195 
196  int res;
197  if (confIds.size()) {
198  res = confIds[0];
199  } else {
200  res = -1;
201  }
202  return res;
203 }
204 
205 //! Compute an embedding (in 3D) for the specified molecule using Distance
206 // Geometry
207 /*!
208  The following operations are performed (in order) here:
209  -# Build a distance bounds matrix based on the topology, including 1-5
210  distances but not VDW scaling
211  -# Triangle smooth this bounds matrix
212  -# If step 2 fails - repeat step 1, this time without 1-5 bounds and with vdW
213  scaling, and repeat step 2
214  -# Pick a distance matrix at random using the bounds matrix
215  -# Compute initial coordinates from the distance matrix
216  -# Repeat steps 3 and 4 until maxIterations is reached or embedding is
217  successful
218  -# Adjust initial coordinates by minimizing a Distance Violation error
219  function
220 
221  **NOTE**: if the molecule has multiple fragments, they will be embedded
222  separately,
223  this means that they will likely occupy the same region of space.
224 
225  \param mol Molecule of interest
226  \param maxIterations Max. number of times the embedding will be tried if
227  coordinates are not obtained successfully. The default
228  value is 10x the number of atoms.
229  \param seed provides a seed for the random number generator (so that
230  the same coordinates can be obtained for a molecule on
231  multiple runs). If negative, the RNG will not be seeded.
232  \param clearConfs Clear all existing conformations on the molecule
233  \param useRandomCoords Start the embedding from random coordinates instead of
234  using eigenvalues of the distance matrix.
235  \param boxSizeMult Determines the size of the box that is used for
236  random coordinates. If this is a positive number, the
237  side length will equal the largest element of the
238  distance matrix times \c boxSizeMult. If this is a
239  negative number, the side length will equal
240  \c -boxSizeMult (i.e. independent of the elements of the
241  distance matrix).
242  \param randNegEig Picks coordinates at random when a embedding process
243  produces negative eigenvalues
244  \param numZeroFail Fail embedding if we find this many or more zero
245  eigenvalues (within a tolerance)
246  \param coordMap a map of int to Point3D, between atom IDs and their locations
247  their locations. If this container is provided, the
248  coordinates are used to set distance constraints on the
249  embedding. The resulting conformer(s) should have distances
250  between the specified atoms that reproduce those between the
251  points in \c coordMap. Because the embedding produces a
252  molecule in an arbitrary reference frame, an alignment step
253  is required to actually reproduce the provided coordinates.
254  \param optimizerForceTol set the tolerance on forces in the distgeom optimizer
255  (this shouldn't normally be altered in client code).
256  \param ignoreSmoothingFailures try to embed the molecule even if triangle
257  bounds smoothing fails
258  \param enforceChirality enforce the correct chirality if chiral centers are
259  present
260  \param useExpTorsionAnglePrefs impose experimental torsion-angle preferences
261  \param useBasicKnowledge impose "basic knowledge" terms such as flat
262  aromatic rings, ketones, etc.
263  \param verbose print output of experimental torsion-angle preferences
264 
265  \param basinThresh set the basin threshold for the DGeom force field,
266  (this shouldn't normally be altered in client code).
267 
268  \param onlyHeavyAtomsForRMS only use the heavy atoms when doing RMS filtering
269 
270  \return ID of the conformations added to the molecule, -1 if the emdedding
271  failed
272 */
273 inline int EmbedMolecule(ROMol &mol, unsigned int maxIterations = 0,
274  int seed = -1, bool clearConfs = true,
275  bool useRandomCoords = false, double boxSizeMult = 2.0,
276  bool randNegEig = true, unsigned int numZeroFail = 1,
277  const std::map<int, RDGeom::Point3D> *coordMap = 0,
278  double optimizerForceTol = 1e-3,
279  bool ignoreSmoothingFailures = false,
280  bool enforceChirality = true,
281  bool useExpTorsionAnglePrefs = false,
282  bool useBasicKnowledge = false, bool verbose = false,
283  double basinThresh = 5.0,
284  bool onlyHeavyAtomsForRMS = false) {
285  EmbedParameters params(
286  maxIterations, 1, seed, clearConfs, useRandomCoords, boxSizeMult,
287  randNegEig, numZeroFail, coordMap, optimizerForceTol,
288  ignoreSmoothingFailures, enforceChirality, useExpTorsionAnglePrefs,
289  useBasicKnowledge, verbose, basinThresh, -1.0, onlyHeavyAtomsForRMS);
290  return EmbedMolecule(mol, params);
291 };
292 
293 //*! Embed multiple conformations for a molecule
294 /*!
295  This is kind of equivalent to calling EmbedMolecule multiple times - just that
296  the bounds
297  matrix is computed only once from the topology
298 
299  **NOTE**: if the molecule has multiple fragments, they will be embedded
300  separately,
301  this means that they will likely occupy the same region of space.
302 
303 
304  \param mol Molecule of interest
305  \param res Used to return the resulting conformer ids
306  \param numConfs Number of conformations to be generated
307  \param numThreads Sets the number of threads to use (more than one thread
308  will only be used if the RDKit was build with
309  multithread
310  support). If set to zero, the max supported by the
311  system
312  will be used.
313  \param maxIterations Max. number of times the embedding will be tried if
314  coordinates are not obtained successfully. The default
315  value is 10x the number of atoms.
316  \param seed provides a seed for the random number generator (so that
317  the same coordinates can be obtained for a molecule on
318  multiple runs). If negative, the RNG will not be seeded.
319  \param clearConfs Clear all existing conformations on the molecule
320  \param useRandomCoords Start the embedding from random coordinates instead of
321  using eigenvalues of the distance matrix.
322  \param boxSizeMult Determines the size of the box that is used for
323  random coordinates. If this is a positive number, the
324  side length will equal the largest element of the
325  distance matrix times \c boxSizeMult. If this is a
326  negative number, the side length will equal
327  \c -boxSizeMult (i.e. independent of the elements of the
328  distance matrix).
329  \param randNegEig Picks coordinates at random when a embedding process
330  produces negative eigenvalues
331  \param numZeroFail Fail embedding if we find this many or more zero
332  eigenvalues (within a tolerance)
333  \param pruneRmsThresh Retain only the conformations out of 'numConfs' after
334  embedding that are at least this far apart from each
335  other. RMSD is computed on the heavy atoms.
336  Pruning is greedy; i.e. the first embedded conformation
337  is retained and from then on only those that are at
338  least
339  pruneRmsThresh away from already retained conformations
340  are kept. The pruning is done after embedding and
341  bounds violation minimization. No pruning by default.
342  \param coordMap a map of int to Point3D, between atom IDs and their locations
343  their locations. If this container is provided, the
344  coordinates are used to set distance constraints on the
345  embedding. The resulting conformer(s) should have distances
346  between the specified atoms that reproduce those between the
347  points in \c coordMap. Because the embedding produces a
348  molecule in an arbitrary reference frame, an alignment step
349  is required to actually reproduce the provided coordinates.
350  \param optimizerForceTol set the tolerance on forces in the DGeom optimizer
351  (this shouldn't normally be altered in client code).
352  \param ignoreSmoothingFailures try to embed the molecule even if triangle
353  bounds smoothing fails
354  \param enforceChirality enforce the correct chirality if chiral centers are
355  present
356  \param useExpTorsionAnglePrefs impose experimental torsion-angle preferences
357  \param useBasicKnowledge impose "basic knowledge" terms such as flat
358  aromatic rings, ketones, etc.
359  \param verbose print output of experimental torsion-angle preferences
360  \param basinThresh set the basin threshold for the DGeom force field,
361  (this shouldn't normally be altered in client code).
362  \param onlyHeavyAtomsForRMS only use the heavy atoms when doing RMS filtering
363 
364 */
365 inline void EmbedMultipleConfs(
366  ROMol &mol, INT_VECT &res, unsigned int numConfs = 10, int numThreads = 1,
367  unsigned int maxIterations = 30, int seed = -1, bool clearConfs = true,
368  bool useRandomCoords = false, double boxSizeMult = 2.0,
369  bool randNegEig = true, unsigned int numZeroFail = 1,
370  double pruneRmsThresh = -1.0,
371  const std::map<int, RDGeom::Point3D> *coordMap = 0,
372  double optimizerForceTol = 1e-3, bool ignoreSmoothingFailures = false,
373  bool enforceChirality = true, bool useExpTorsionAnglePrefs = false,
374  bool useBasicKnowledge = false, bool verbose = false,
375  double basinThresh = 5.0, bool onlyHeavyAtomsForRMS = false) {
376  EmbedParameters params(maxIterations, numThreads, seed, clearConfs,
377  useRandomCoords, boxSizeMult, randNegEig, numZeroFail,
378  coordMap, optimizerForceTol, ignoreSmoothingFailures,
379  enforceChirality, useExpTorsionAnglePrefs,
380  useBasicKnowledge, verbose, basinThresh,
381  pruneRmsThresh, onlyHeavyAtomsForRMS);
382  EmbedMultipleConfs(mol, res, numConfs, params);
383 };
384 //! \overload
386  ROMol &mol, unsigned int numConfs = 10, unsigned int maxIterations = 30,
387  int seed = -1, bool clearConfs = true, bool useRandomCoords = false,
388  double boxSizeMult = 2.0, bool randNegEig = true,
389  unsigned int numZeroFail = 1, double pruneRmsThresh = -1.0,
390  const std::map<int, RDGeom::Point3D> *coordMap = 0,
391  double optimizerForceTol = 1e-3, bool ignoreSmoothingFailures = false,
392  bool enforceChirality = true, bool useExpTorsionAnglePrefs = false,
393  bool useBasicKnowledge = false, bool verbose = false,
394  double basinThresh = 5.0, bool onlyHeavyAtomsForRMS = false) {
395  EmbedParameters params(maxIterations, 1, seed, clearConfs, useRandomCoords,
396  boxSizeMult, randNegEig, numZeroFail, coordMap,
397  optimizerForceTol, ignoreSmoothingFailures,
398  enforceChirality, useExpTorsionAnglePrefs,
399  useBasicKnowledge, verbose, basinThresh,
400  pruneRmsThresh, onlyHeavyAtomsForRMS);
401  INT_VECT res;
402  EmbedMultipleConfs(mol, res, numConfs, params);
403  return res;
404 };
405 
406 //! Parameters corresponding to Sereina Riniker's KDG approach
407 RDKIT_DISTGEOMHELPERS_EXPORT extern const EmbedParameters KDG;
408 //! Parameters corresponding to Sereina Riniker's ETDG approach
409 RDKIT_DISTGEOMHELPERS_EXPORT extern const EmbedParameters ETDG;
410 //! Parameters corresponding to Sereina Riniker's ETKDG approach
411 RDKIT_DISTGEOMHELPERS_EXPORT extern const EmbedParameters ETKDG;
412 //! Parameters corresponding to Sereina Riniker's ETKDG approach - version 2
413 RDKIT_DISTGEOMHELPERS_EXPORT extern const EmbedParameters ETKDGv2;
414 } // namespace DGeomHelpers
415 } // namespace RDKit
416 
417 #endif
RDKit::DGeomHelpers::EmbedParameters::useExpTorsionAnglePrefs
bool useExpTorsionAnglePrefs
Definition: Embedder.h:114
BoundsMatrix.h
RDKit::DGeomHelpers::EmbedParameters::pruneRmsThresh
double pruneRmsThresh
Definition: Embedder.h:118
RDKit::DGeomHelpers::EmbedParameters::useBasicKnowledge
bool useBasicKnowledge
Definition: Embedder.h:115
point.h
ROMol.h
Defines the primary molecule class ROMol as well as associated typedefs.
RDKit::INT_VECT
std::vector< int > INT_VECT
Definition: types.h:254
RDKit::DGeomHelpers::EmbedParameters::enforceChirality
bool enforceChirality
Definition: Embedder.h:113
RDKit::DGeomHelpers::EmbedParameters::onlyHeavyAtomsForRMS
bool onlyHeavyAtomsForRMS
Definition: Embedder.h:119
RDKit::DGeomHelpers::EmbedParameters::clearConfs
bool clearConfs
Definition: Embedder.h:105
RDKit::DGeomHelpers::EmbedParameters::randNegEig
bool randNegEig
Definition: Embedder.h:108
RDKit::DGeomHelpers::ETDG
const RDKIT_DISTGEOMHELPERS_EXPORT EmbedParameters ETDG
Parameters corresponding to Sereina Riniker's ETDG approach.
RDKit::DGeomHelpers::EmbedParameters::coordMap
const std::map< int, RDGeom::Point3D > * coordMap
Definition: Embedder.h:110
RDKit::DGeomHelpers::EmbedMultipleConfs
RDKIT_DISTGEOMHELPERS_EXPORT void EmbedMultipleConfs(ROMol &mol, INT_VECT &res, unsigned int numConfs, const EmbedParameters &params)
RDKit::DGeomHelpers::EmbedParameters::ignoreSmoothingFailures
bool ignoreSmoothingFailures
Definition: Embedder.h:112
RDKIT_DISTGEOMHELPERS_EXPORT
#define RDKIT_DISTGEOMHELPERS_EXPORT
Definition: export.h:151
RDKit::DGeomHelpers::EmbedParameters::ETversion
unsigned int ETversion
Definition: Embedder.h:120
RDKit::ROMol
Definition: ROMol.h:171
RDKit::DGeomHelpers::EmbedParameters::verbose
bool verbose
Definition: Embedder.h:116
RDKit::DGeomHelpers::EmbedParameters::boxSizeMult
double boxSizeMult
Definition: Embedder.h:107
RDKit::DGeomHelpers::EmbedParameters::embedFragmentsSeparately
bool embedFragmentsSeparately
Definition: Embedder.h:122
RDKit::DGeomHelpers::EmbedParameters::boundsMat
boost::shared_ptr< const DistGeom::BoundsMatrix > boundsMat
Definition: Embedder.h:121
RDKit::DGeomHelpers::EmbedParameters::numThreads
int numThreads
Definition: Embedder.h:103
RDKit::DGeomHelpers::EmbedParameters::EmbedParameters
EmbedParameters()
Definition: Embedder.h:123
RDKit::DGeomHelpers::EmbedParameters::optimizerForceTol
double optimizerForceTol
Definition: Embedder.h:111
RDKit::DGeomHelpers::EmbedParameters::maxIterations
unsigned int maxIterations
Definition: Embedder.h:102
RDKit::DGeomHelpers::EmbedParameters::useRandomCoords
bool useRandomCoords
Definition: Embedder.h:106
RDKit
Std stuff.
Definition: Atom.h:30
RDKit::DGeomHelpers::KDG
const RDKIT_DISTGEOMHELPERS_EXPORT EmbedParameters KDG
Parameters corresponding to Sereina Riniker's KDG approach.
RDKit::DGeomHelpers::EmbedMolecule
int EmbedMolecule(ROMol &mol, const EmbedParameters &params)
Compute an embedding (in 3D) for the specified molecule using Distance.
Definition: Embedder.h:192
RDKit::DGeomHelpers::ETKDGv2
const RDKIT_DISTGEOMHELPERS_EXPORT EmbedParameters ETKDGv2
Parameters corresponding to Sereina Riniker's ETKDG approach - version 2.
RDKit::DGeomHelpers::EmbedParameters::randomSeed
int randomSeed
Definition: Embedder.h:104
RDKit::DGeomHelpers::EmbedParameters::numZeroFail
unsigned int numZeroFail
Definition: Embedder.h:109
RDKit::DGeomHelpers::EmbedParameters::basinThresh
double basinThresh
Definition: Embedder.h:117
RDKit::DGeomHelpers::EmbedParameters
Parameter object for controlling embedding.
Definition: Embedder.h:101
RDKit::DGeomHelpers::EmbedParameters::EmbedParameters
EmbedParameters(unsigned int maxIterations, int numThreads, int randomSeed, bool clearConfs, bool useRandomCoords, double boxSizeMult, bool randNegEig, unsigned int numZeroFail, const std::map< int, RDGeom::Point3D > *coordMap, double optimizerForceTol, bool ignoreSmoothingFailures, bool enforceChirality, bool useExpTorsionAnglePrefs, bool useBasicKnowledge, bool verbose, double basinThresh, double pruneRmsThresh, bool onlyHeavyAtomsForRMS, unsigned int ETversion=1, const DistGeom::BoundsMatrix *boundsMat=nullptr, bool embedFragmentsSeparately=true)
Definition: Embedder.h:145
RDKit::DGeomHelpers::ETKDG
const RDKIT_DISTGEOMHELPERS_EXPORT EmbedParameters ETKDG
Parameters corresponding to Sereina Riniker's ETKDG approach.
DistGeom::BoundsMatrix
Class to store the distance bound.
Definition: BoundsMatrix.h:27
export.h