RDKit
Open-source cheminformatics and machine learning.
SubstructLibrary.h
Go to the documentation of this file.
1 // Copyright (c) 2017-2019, Novartis Institutes for BioMedical Research Inc.
2 // All rights reserved.
3 //
4 // Redistribution and use in source and binary forms, with or without
5 // modification, are permitted provided that the following conditions are
6 // met:
7 //
8 // * Redistributions of source code must retain the above copyright
9 // notice, this list of conditions and the following disclaimer.
10 // * Redistributions in binary form must reproduce the above
11 // copyright notice, this list of conditions and the following
12 // disclaimer in the documentation and/or other materials provided
13 // with the distribution.
14 // * Neither the name of Novartis Institutes for BioMedical Research Inc.
15 // nor the names of its contributors may be used to endorse or promote
16 // products derived from this software without specific prior written
17 // permission.
18 //
19 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 //
31 #ifndef RDK_SUBSTRUCT_LIBRARY
32 #define RDK_SUBSTRUCT_LIBRARY
33 #include <RDGeneral/export.h>
34 #include <GraphMol/RDKitBase.h>
35 #include <GraphMol/MolPickler.h>
40 #include <DataStructs/BitOps.h>
41 
42 namespace RDKit {
43 
45 
46 //! Base class API for holding molecules to substructure search.
47 /*!
48  This is an API that hides the implementation details used for
49  indexing molecules for substructure searching. It simply
50  provides an API for adding and getting molecules from a set.
51  */
53  public:
54  virtual ~MolHolderBase() {}
55 
56  //! Add a new molecule to the substructure search library
57  //! Returns the molecules index in the library
58  virtual unsigned int addMol(const ROMol &m) = 0;
59 
60  // implementations should throw IndexError on out of range
61  virtual boost::shared_ptr<ROMol> getMol(unsigned int) const = 0;
62 
63  //! Get the current library size
64  virtual unsigned int size() const = 0;
65 };
66 
67 //! Concrete class that holds molecules in memory
68 /*!
69  This is currently one of the faster implementations.
70  However it is very memory intensive.
71 */
73  std::vector<boost::shared_ptr<ROMol>> mols;
74 
75  public:
76  MolHolder() : MolHolderBase(), mols() {}
77 
78  virtual unsigned int addMol(const ROMol &m) {
79  mols.push_back(boost::make_shared<ROMol>(m));
80  return size() - 1;
81  }
82 
83  virtual boost::shared_ptr<ROMol> getMol(unsigned int idx) const {
84  if (idx >= mols.size()) throw IndexErrorException(idx);
85  return mols[idx];
86  }
87 
88  virtual unsigned int size() const {
89  return rdcast<unsigned int>(mols.size());
90  }
91 
92  std::vector<boost::shared_ptr<ROMol>> &getMols() { return mols; }
93  const std::vector<boost::shared_ptr<ROMol>> &getMols() const { return mols; }
94 };
95 
96 //! Concrete class that holds binary cached molecules in memory
97 /*!
98  This implementation uses quite a bit less memory than the
99  non cached implementation. However, due to the reduced speed
100  it should be used in conjunction with a pattern fingerprinter.
101 
102  See RDKit::FPHolder
103 */
105  std::vector<std::string> mols;
106 
107  public:
109 
110  virtual unsigned int addMol(const ROMol &m) {
111  mols.push_back(std::string());
112  MolPickler::pickleMol(m, mols.back());
113  return size() - 1;
114  }
115 
116  //! Adds a pickled binary molecule, no validity checking of the input
117  //! is done.
118  unsigned int addBinary(const std::string &pickle) {
119  mols.push_back(pickle);
120  return size() - 1;
121  }
122 
123  virtual boost::shared_ptr<ROMol> getMol(unsigned int idx) const {
124  if (idx >= mols.size()) throw IndexErrorException(idx);
125  boost::shared_ptr<ROMol> mol(new ROMol);
126  MolPickler::molFromPickle(mols[idx], mol.get());
127  return mol;
128  }
129 
130  virtual unsigned int size() const {
131  return rdcast<unsigned int>(mols.size());
132  }
133 
134  std::vector<std::string> &getMols() { return mols; }
135  const std::vector<std::string> &getMols() const { return mols; }
136 };
137 
138 //! Concrete class that holds smiles strings in memory
139 /*!
140  This implementation uses quite a bit less memory than the
141  cached binary or uncached implementation. However, due to the
142  reduced speed it should be used in conjunction with a pattern
143  fingerprinter.
144 
145  See RDKit::FPHolder
146 */
148  : public MolHolderBase {
149  std::vector<std::string> mols;
150 
151  public:
153 
154  virtual unsigned int addMol(const ROMol &m) {
155  bool doIsomericSmiles = true;
156  mols.push_back(MolToSmiles(m, doIsomericSmiles));
157  return size() - 1;
158  }
159 
160  //! Add a smiles to the dataset, no validation is done
161  //! to the inputs.
162  unsigned int addSmiles(const std::string &smiles) {
163  mols.push_back(smiles);
164  return size() - 1;
165  }
166 
167  virtual boost::shared_ptr<ROMol> getMol(unsigned int idx) const {
168  if (idx >= mols.size()) throw IndexErrorException(idx);
169 
170  boost::shared_ptr<ROMol> mol(SmilesToMol(mols[idx]));
171  return mol;
172  }
173 
174  virtual unsigned int size() const {
175  return rdcast<unsigned int>(mols.size());
176  }
177 
178  std::vector<std::string> &getMols() { return mols; }
179  const std::vector<std::string> &getMols() const { return mols; }
180 };
181 
182 //! Concrete class that holds trusted smiles strings in memory
183 /*!
184  A trusted smiles is essentially a smiles string that
185  RDKit has generated. This indicates that fewer
186  sanitization steps are required. See
187  http://rdkit.blogspot.com/2016/09/avoiding-unnecessary-work-and.html
188 
189  This implementation uses quite a bit less memory than the
190  cached binary or uncached implementation. However, due to the
191  reduced speed it should be used in conjunction with a pattern
192  fingerprinter.
193 
194  See RDKit::FPHolder
195 */
197  : public MolHolderBase {
198  std::vector<std::string> mols;
199 
200  public:
202 
203  virtual unsigned int addMol(const ROMol &m) {
204  bool doIsomericSmiles = true;
205  mols.push_back(MolToSmiles(m, doIsomericSmiles));
206  return size() - 1;
207  }
208 
209  //! Add a smiles to the dataset, no validation is done
210  //! to the inputs.
211  unsigned int addSmiles(const std::string &smiles) {
212  mols.push_back(smiles);
213  return size() - 1;
214  }
215 
216  virtual boost::shared_ptr<ROMol> getMol(unsigned int idx) const {
217  if (idx >= mols.size()) throw IndexErrorException(idx);
218 
219  RWMol *m = SmilesToMol(mols[idx], 0, false);
220  m->updatePropertyCache();
221  return boost::shared_ptr<ROMol>(m);
222  }
223 
224  virtual unsigned int size() const {
225  return rdcast<unsigned int>(mols.size());
226  }
227 
228  std::vector<std::string> &getMols() { return mols; }
229  const std::vector<std::string> &getMols() const { return mols; }
230 };
231 
232 //! Base FPI for the fingerprinter used to rule out impossible matches
234  std::vector<ExplicitBitVect *> fps;
235 
236  public:
237  virtual ~FPHolderBase() {
238  for (size_t i = 0; i < fps.size(); ++i) delete fps[i];
239  }
240 
241  //! Adds a molecule to the fingerprinter
242  unsigned int addMol(const ROMol &m) {
243  fps.push_back(makeFingerprint(m));
244  return rdcast<unsigned int>(fps.size() - 1);
245  }
246 
247  //! Adds a raw bit vector to the fingerprinter
248  unsigned int addFingerprint(const ExplicitBitVect &v) {
249  fps.push_back(new ExplicitBitVect(v));
250  return rdcast<unsigned int>(fps.size() - 1);
251  }
252 
253  //! Return false if a substructure search can never match the molecule
254  bool passesFilter(unsigned int idx, const ExplicitBitVect &query) const {
255  if (idx >= fps.size()) throw IndexErrorException(idx);
256 
257  return AllProbeBitsMatch(query, *fps[idx]);
258  }
259 
260  //! Get the bit vector at the specified index (throws IndexError if out of
261  //! range)
262  const ExplicitBitVect &getFingerprint(unsigned int idx) const {
263  if (idx >= fps.size()) throw IndexErrorException(idx);
264  return *fps[idx];
265  }
266 
267  //! make the query vector
268  //! Caller owns the vector!
269  virtual ExplicitBitVect *makeFingerprint(const ROMol &m) const = 0;
270 
271  std::vector<ExplicitBitVect *> &getFingerprints() { return fps; }
272  const std::vector<ExplicitBitVect *> &getFingerprints() const { return fps; }
273 };
274 
275 //! Uses the pattern fingerprinter to rule out matches
277  public:
278  //! Caller owns the vector!
279  virtual ExplicitBitVect *makeFingerprint(const ROMol &m) const {
280  return PatternFingerprintMol(m, 2048);
281  }
282 };
283 
284 //! Substructure Search a library of molecules
285 /*! This class allows for multithreaded substructure searches os
286  large datasets.
287 
288  The implementations can use fingerprints to speed up searches
289  and have molecules cached as binary forms to reduce memory
290  usage.
291 
292  basic usage:
293  \code
294  SubstructLibrary lib;
295  lib.addMol(mol);
296  std::vector<unsigned int> results = lib.getMatches(query);
297  for(std::vector<unsigned int>::const_iterator matchIndex=results.begin();
298  matchIndex != results.end();
299  ++matchIndex) {
300  boost::shared_ptr<ROMol> match = lib.getMol(*matchIndex);
301  }
302  \endcode
303 
304  Using different mol holders and pattern fingerprints.
305 
306  \code
307  boost::shared_ptr<CachedTrustedSmilesMolHolder> molHolder = \
308  boost::make_shared<CachedTrustedSmilesMolHolder>();
309  boost::shared_ptr<PatternHolder> patternHolder = \
310  boost::make_shared<PatternHolder>();
311 
312  SubstructLibrary lib(molHolder, patternHolder);
313  lib.addMol(mol);
314  \endcode
315 
316  Cached molecule holders create molecules on demand. There are currently
317  three styles of cached molecules.
318 
319  CachedMolHolder: stores molecules in the rdkit binary format.
320  CachedSmilesMolHolder: stores molecules in smiles format.
321  CachedTrustedSmilesMolHolder: stores molecules in smiles format.
322 
323  The CachedTrustedSmilesMolHolder is made to add molecules from
324  a trusted source. This makes the basic assumption that RDKit was
325  used to sanitize and canonicalize the smiles string. In practice
326  this is considerably faster than using arbitrary smiles strings since
327  certain assumptions can be made.
328 
329  When loading from external data, as opposed to using the "addMol" API,
330  care must be taken to ensure that the pattern fingerprints and smiles
331  are synchronized.
332 
333  Each pattern holder has an API point for making its fingerprint. This
334  is useful to ensure that the pattern stored in the database will be
335  compatible with the patterns made when analyzing queries.
336 
337  \code
338  boost::shared_ptr<CachedTrustedSmilesMolHolder> molHolder = \
339  boost::make_shared<CachedTrustedSmilesMolHolder>();
340  boost::shared_ptr<PatternHolder> patternHolder = \
341  boost::make_shared<PatternHolder>();
342 
343  // the PatternHolder instance is able to make fingerprints.
344  // These, of course, can be read from a file. For demonstration
345  // purposes we construct them here.
346  const std::string trustedSmiles = "c1ccccc1";
347  ROMol *m = SmilesToMol(trustedSmiles);
348  const ExplicitBitVect *bitVector = patternHolder->makeFingerprint(*m);
349 
350  // The trusted smiles and bitVector can be read from any source.
351  // This is the fastest way to load a substruct library.
352  molHolder->addSmiles( trustedSmiles );
353  patternHolder->addFingerprint( *bitVector );
354  SubstructLibrary lib(molHolder, patternHolder);
355  delete m;
356  delete bitVector;
357  \endcode
358 
359 */
361  boost::shared_ptr<MolHolderBase> molholder;
362  boost::shared_ptr<FPHolderBase> fpholder;
363  MolHolderBase *mols; // used for a small optimization
364  FPHolderBase *fps;
365 
366  public:
368  : molholder(new MolHolder),
369  fpholder(),
370  mols(molholder.get()),
371  fps(nullptr) {}
372 
373  SubstructLibrary(boost::shared_ptr<MolHolderBase> molecules)
374  : molholder(molecules), fpholder(), mols(molholder.get()), fps(0) {}
375 
376  SubstructLibrary(boost::shared_ptr<MolHolderBase> molecules,
377  boost::shared_ptr<FPHolderBase> fingerprints)
378  : molholder(molecules),
379  fpholder(fingerprints),
380  mols(molholder.get()),
381  fps(fpholder.get()) {}
382 
383  SubstructLibrary(const std::string &pickle)
384  : molholder(new MolHolder),
385  fpholder(),
386  mols(molholder.get()),
387  fps(nullptr) {
388  initFromString(pickle);
389  }
390 
391  //! Get the underlying molecule holder implementation
392  boost::shared_ptr<MolHolderBase> &getMolHolder() { return molholder; }
393 
394  const boost::shared_ptr<MolHolderBase> &getMolHolder() const {
395  return molholder;
396  }
397 
398  //! Get the underlying molecule holder implementation
399  boost::shared_ptr<FPHolderBase> &getFpHolder() { return fpholder; }
400 
401  //! Get the underlying molecule holder implementation
402  const boost::shared_ptr<FPHolderBase> &getFpHolder() const {
403  return fpholder;
404  }
405 
406  const MolHolderBase &getMolecules() const {
407  PRECONDITION(mols, "Molecule holder NULL in SubstructLibrary");
408  return *mols;
409  }
410 
411  //! Get the underlying fingerprint implementation.
412  /*! Throws a value error if no fingerprints have been set */
414  if (!fps)
415  throw ValueErrorException("Substruct Library does not have fingerprints");
416  return *fps;
417  }
418 
419  const FPHolderBase &getFingerprints() const {
420  if (!fps)
421  throw ValueErrorException("Substruct Library does not have fingerprints");
422  return *fps;
423  }
424 
425  //! Add a molecule to the library
426  /*!
427  \param mol Molecule to add
428 
429  returns index for the molecule in the library
430  */
431  unsigned int addMol(const ROMol &mol);
432 
433  //! Get the matching indices for the query
434  /*!
435  \param query Query to match against molecules
436  \param recursionPossible flags whether or not recursive matches are allowed
437  [ default true ]
438  \param useChirality use atomic CIP codes as part of the comparison [
439  default true ]
440  \param useQueryQueryMatches if set, the contents of atom and bond queries [
441  default false ]
442  will be used as part of the matching
443  \param numThreads If -1 use all available processors [default -1]
444  \param maxResults Maximum results to return, -1 means return all [default
445  -1]
446  */
447  std::vector<unsigned int> getMatches(const ROMol &query,
448  bool recursionPossible = true,
449  bool useChirality = true,
450  bool useQueryQueryMatches = false,
451  int numThreads = -1,
452  int maxResults = -1);
453  //! Get the matching indices for the query between the given indices
454  /*!
455  \param query Query to match against molecules
456  \param startIdx Start index of the search
457  \param endIdx Ending idx (non-inclusive) of the search.
458  \param recursionPossible flags whether or not recursive matches are allowed
459  [ default true ]
460  \param useChirality use atomic CIP codes as part of the comparison [
461  default true ]
462  \param useQueryQueryMatches if set, the contents of atom and bond queries [
463  default false ]
464  will be used as part of the matching
465  \param numThreads If -1 use all available processors [default -1]
466  \param maxResults Maximum results to return, -1 means return all [default
467  -1]
468  */
469  std::vector<unsigned int> getMatches(
470  const ROMol &query, unsigned int startIdx, unsigned int endIdx,
471  bool recursionPossible = true, bool useChirality = true,
472  bool useQueryQueryMatches = false, int numThreads = -1,
473  int maxResults = -1);
474 
475  //! Return the number of matches for the query
476  /*!
477  \param query Query to match against molecules
478  \param recursionPossible flags whether or not recursive matches are allowed
479  [ default true ]
480  \param useChirality use atomic CIP codes as part of the comparison [
481  default true ]
482  \param useQueryQueryMatches if set, the contents of atom and bond queries [
483  default false ]
484  will be used as part of the matching
485  \param numThreads If -1 use all available processors [default -1]
486  */
487  unsigned int countMatches(const ROMol &query, bool recursionPossible = true,
488  bool useChirality = true,
489  bool useQueryQueryMatches = false,
490  int numThreads = -1);
491  //! Return the number of matches for the query between the given indices
492  /*!
493  \param query Query to match against molecules
494  \param startIdx Start index of the search
495  \param endIdx Ending idx (non-inclusive) of the search.
496  \param recursionPossible flags whether or not recursive matches are allowed
497  [ default true ]
498  \param useChirality use atomic CIP codes as part of the comparison [
499  default true ]
500  \param useQueryQueryMatches if set, the contents of atom and bond queries [
501  default false ]
502  will be used as part of the matching
503  \param numThreads If -1 use all available processors [default -1]
504  */
505  unsigned int countMatches(const ROMol &query, unsigned int startIdx,
506  unsigned int endIdx, bool recursionPossible = true,
507  bool useChirality = true,
508  bool useQueryQueryMatches = false,
509  int numThreads = -1);
510 
511  //! Returns true if any match exists for the query
512  /*!
513  \param query Query to match against molecules
514  \param recursionPossible flags whether or not recursive matches are allowed
515  [ default true ]
516  \param useChirality use atomic CIP codes as part of the comparison [
517  default true ]
518  \param useQueryQueryMatches if set, the contents of atom and bond queries [
519  default false ]
520  will be used as part of the matching
521  \param numThreads If -1 use all available processors [default -1]
522  */
523  bool hasMatch(const ROMol &query, bool recursionPossible = true,
524  bool useChirality = true, bool useQueryQueryMatches = false,
525  int numThreads = -1);
526  //! Returns true if any match exists for the query between the specified
527  //! indices
528  /*!
529  \param query Query to match against molecules
530  \param startIdx Start index of the search
531  \param endIdx Ending idx (inclusive) of the search.
532  \param recursionPossible flags whether or not recursive matches are allowed
533  [ default true ]
534  \param useChirality use atomic CIP codes as part of the comparison [
535  default true ]
536  \param useQueryQueryMatches if set, the contents of atom and bond queries [
537  default false ]
538  will be used as part of the matching
539  \param numThreads If -1 use all available processors [default -1]
540  */
541  bool hasMatch(const ROMol &query, unsigned int startIdx, unsigned int endIdx,
542  bool recursionPossible = true, bool useChirality = true,
543  bool useQueryQueryMatches = false, int numThreads = -1);
544 
545  //! Returns the molecule at the given index
546  /*!
547  \param idx Index of the molecule in the library
548  */
549  boost::shared_ptr<ROMol> getMol(unsigned int idx) const {
550  // expects implementation to throw IndexError if out of range
551  PRECONDITION(mols, "molholder is null in SubstructLibrary");
552  return mols->getMol(idx);
553  }
554 
555  //! Returns the molecule at the given index
556  /*!
557  \param idx Index of the molecule in the library
558  */
559  boost::shared_ptr<ROMol> operator[](unsigned int idx) {
560  // expects implementation to throw IndexError if out of range
561  PRECONDITION(mols, "molholder is null in SubstructLibrary");
562  return mols->getMol(idx);
563  }
564 
565  //! return the number of molecules in the library
566  unsigned int size() const {
567  PRECONDITION(mols, "molholder is null in SubstructLibrary");
568  return rdcast<unsigned int>(molholder->size());
569  }
570 
571  //! access required for serialization
572  void resetHolders() {
573  mols = molholder.get();
574  fps = fpholder.get();
575  }
576 
577  //! serializes (pickles) to a stream
578  void toStream(std::ostream &ss) const;
579  //! returns a string with a serialized (pickled) representation
580  std::string Serialize() const;
581  //! initializes from a stream pickle
582  void initFromStream(std::istream &ss);
583  //! initializes from a string pickle
584  void initFromString(const std::string &text);
585 };
586 } // namespace RDKit
587 
589 #endif
RDKit::MolHolder::size
virtual unsigned int size() const
Get the current library size.
Definition: SubstructLibrary.h:88
RDKit::CachedTrustedSmilesMolHolder::CachedTrustedSmilesMolHolder
CachedTrustedSmilesMolHolder()
Definition: SubstructLibrary.h:201
RDKit::CachedMolHolder::getMols
const std::vector< std::string > & getMols() const
Definition: SubstructLibrary.h:135
RDKit::CachedMolHolder::size
virtual unsigned int size() const
Get the current library size.
Definition: SubstructLibrary.h:130
RDKit::FPHolderBase::getFingerprints
const std::vector< ExplicitBitVect * > & getFingerprints() const
Definition: SubstructLibrary.h:272
RDKit::EnumerationStrategyPickler::pickle
RDKIT_CHEMREACTIONS_EXPORT void pickle(const boost::shared_ptr< EnumerationStrategyBase > &enumerator, std::ostream &ss)
pickles a EnumerationStrategy and adds the results to a stream ss
RDKit::CachedSmilesMolHolder::size
virtual unsigned int size() const
Get the current library size.
Definition: SubstructLibrary.h:174
RDKit::CachedTrustedSmilesMolHolder
Concrete class that holds trusted smiles strings in memory.
Definition: SubstructLibrary.h:196
RDKit::SubstructLibraryCanSerialize
RDKIT_SUBSTRUCTLIBRARY_EXPORT bool SubstructLibraryCanSerialize()
BitOps.h
Contains general bit-comparison and similarity operations.
RDKit::MolHolder::MolHolder
MolHolder()
Definition: SubstructLibrary.h:76
RDKit::CachedTrustedSmilesMolHolder::getMols
std::vector< std::string > & getMols()
Definition: SubstructLibrary.h:228
RDKit::CachedTrustedSmilesMolHolder::addSmiles
unsigned int addSmiles(const std::string &smiles)
Definition: SubstructLibrary.h:211
Fingerprints.h
RDKit::MolToSmiles
RDKIT_SMILESPARSE_EXPORT std::string MolToSmiles(const ROMol &mol, bool doIsomericSmiles=true, bool doKekule=false, int rootedAtAtom=-1, bool canonical=true, bool allBondsExplicit=false, bool allHsExplicit=false, bool doRandom=false)
returns canonical SMILES for a molecule
RDKit::RWMol
RWMol is a molecule class that is intended to be edited.
Definition: RWMol.h:31
SmilesWrite.h
RDKit::CachedSmilesMolHolder::addMol
virtual unsigned int addMol(const ROMol &m)
Definition: SubstructLibrary.h:154
RDKit::SubstructLibrary::getFpHolder
boost::shared_ptr< FPHolderBase > & getFpHolder()
Get the underlying molecule holder implementation.
Definition: SubstructLibrary.h:399
SmilesParse.h
RDKit::SubstructLibrary
Substructure Search a library of molecules.
Definition: SubstructLibrary.h:360
RDKit::FPHolderBase
Base FPI for the fingerprinter used to rule out impossible matches.
Definition: SubstructLibrary.h:233
RDKit::SubstructLibrary::operator[]
boost::shared_ptr< ROMol > operator[](unsigned int idx)
Returns the molecule at the given index.
Definition: SubstructLibrary.h:559
RDKit::FPHolderBase::getFingerprint
const ExplicitBitVect & getFingerprint(unsigned int idx) const
Definition: SubstructLibrary.h:262
ExplicitBitVect.h
SubstructLibrarySerialization.h
RDKit::MolHolder::getMol
virtual boost::shared_ptr< ROMol > getMol(unsigned int idx) const
Definition: SubstructLibrary.h:83
RDKit::CachedTrustedSmilesMolHolder::getMol
virtual boost::shared_ptr< ROMol > getMol(unsigned int idx) const
Definition: SubstructLibrary.h:216
RDKit::SubstructLibrary::getMolHolder
boost::shared_ptr< MolHolderBase > & getMolHolder()
Get the underlying molecule holder implementation.
Definition: SubstructLibrary.h:392
RDKit::MolPickler::molFromPickle
static void molFromPickle(const std::string &pickle, ROMol *mol)
constructs a molecule from a pickle stored in a string
RDKit::PatternHolder::makeFingerprint
virtual ExplicitBitVect * makeFingerprint(const ROMol &m) const
Caller owns the vector!
Definition: SubstructLibrary.h:279
RDKit::MolHolder::getMols
const std::vector< boost::shared_ptr< ROMol > > & getMols() const
Definition: SubstructLibrary.h:93
RDKit::SmilesToMol
RDKIT_SMILESPARSE_EXPORT RWMol * SmilesToMol(const std::string &smi, const SmilesParserParams &params)
RDKit::FPHolderBase::getFingerprints
std::vector< ExplicitBitVect * > & getFingerprints()
Definition: SubstructLibrary.h:271
RDKit::MolHolderBase::getMol
virtual boost::shared_ptr< ROMol > getMol(unsigned int) const =0
RDKit::MolHolderBase
Base class API for holding molecules to substructure search.
Definition: SubstructLibrary.h:52
RDKit::ROMol
Definition: ROMol.h:171
RDKit::SubstructLibrary::getFingerprints
const FPHolderBase & getFingerprints() const
Definition: SubstructLibrary.h:419
RDKit::CachedSmilesMolHolder::getMol
virtual boost::shared_ptr< ROMol > getMol(unsigned int idx) const
Definition: SubstructLibrary.h:167
RDKitBase.h
pulls in the core RDKit functionality
RDKit::FPHolderBase::~FPHolderBase
virtual ~FPHolderBase()
Definition: SubstructLibrary.h:237
RDKit::FPHolderBase::addMol
unsigned int addMol(const ROMol &m)
Adds a molecule to the fingerprinter.
Definition: SubstructLibrary.h:242
RDKit::CachedSmilesMolHolder::getMols
const std::vector< std::string > & getMols() const
Definition: SubstructLibrary.h:179
AllProbeBitsMatch
RDKIT_DATASTRUCTS_EXPORT bool AllProbeBitsMatch(const char *probe, const char *ref)
RDKit::CachedTrustedSmilesMolHolder::getMols
const std::vector< std::string > & getMols() const
Definition: SubstructLibrary.h:229
RDKit::CachedSmilesMolHolder
Concrete class that holds smiles strings in memory.
Definition: SubstructLibrary.h:147
RDKit::SubstructLibrary::getMolHolder
const boost::shared_ptr< MolHolderBase > & getMolHolder() const
Definition: SubstructLibrary.h:394
ValueErrorException
Class to allow us to throw a ValueError from C++ and have it make it back to Python.
Definition: Exceptions.h:33
RDKit::SubstructLibrary::size
unsigned int size() const
return the number of molecules in the library
Definition: SubstructLibrary.h:566
RDKit::SubstructLibrary::SubstructLibrary
SubstructLibrary(const std::string &pickle)
Definition: SubstructLibrary.h:383
RDKit::CachedSmilesMolHolder::CachedSmilesMolHolder
CachedSmilesMolHolder()
Definition: SubstructLibrary.h:152
RDKit::MolHolder
Concrete class that holds molecules in memory.
Definition: SubstructLibrary.h:72
RDKit::CachedTrustedSmilesMolHolder::addMol
virtual unsigned int addMol(const ROMol &m)
Definition: SubstructLibrary.h:203
RDKit::ROMol::updatePropertyCache
void updatePropertyCache(bool strict=true)
calculates any of our lazy properties
RDKit::SubstructLibrary::SubstructLibrary
SubstructLibrary()
Definition: SubstructLibrary.h:367
RDKit::SubstructLibrary::resetHolders
void resetHolders()
access required for serialization
Definition: SubstructLibrary.h:572
RDKit::CachedMolHolder::getMol
virtual boost::shared_ptr< ROMol > getMol(unsigned int idx) const
Definition: SubstructLibrary.h:123
RDKit::MolPickler::pickleMol
static void pickleMol(const ROMol *mol, std::ostream &ss)
pickles a molecule and sends the results to stream ss
RDKit
Std stuff.
Definition: Atom.h:30
RDKit::PatternFingerprintMol
RDKIT_FINGERPRINTS_EXPORT ExplicitBitVect * PatternFingerprintMol(const ROMol &mol, unsigned int fpSize=2048, std::vector< unsigned int > *atomCounts=0, ExplicitBitVect *setOnlyBits=0)
Generates a topological fingerprint for a molecule using a series of pre-defined structural patterns.
IndexErrorException
Class to allow us to throw an IndexError from C++ and have it make it back to Python.
Definition: Exceptions.h:19
MolPickler.h
RDKit::CachedMolHolder::getMols
std::vector< std::string > & getMols()
Definition: SubstructLibrary.h:134
RDKit::PatternHolder
Uses the pattern fingerprinter to rule out matches.
Definition: SubstructLibrary.h:276
RDKit::SubstructLibrary::getFingerprints
FPHolderBase & getFingerprints()
Get the underlying fingerprint implementation.
Definition: SubstructLibrary.h:413
RDKit::CachedMolHolder::addMol
virtual unsigned int addMol(const ROMol &m)
Definition: SubstructLibrary.h:110
RDKit::FPHolderBase::passesFilter
bool passesFilter(unsigned int idx, const ExplicitBitVect &query) const
Return false if a substructure search can never match the molecule.
Definition: SubstructLibrary.h:254
PRECONDITION
#define PRECONDITION(expr, mess)
Definition: Invariant.h:109
RDKit::SubstructLibrary::SubstructLibrary
SubstructLibrary(boost::shared_ptr< MolHolderBase > molecules, boost::shared_ptr< FPHolderBase > fingerprints)
Definition: SubstructLibrary.h:376
RDKit::SubstructLibrary::getFpHolder
const boost::shared_ptr< FPHolderBase > & getFpHolder() const
Get the underlying molecule holder implementation.
Definition: SubstructLibrary.h:402
RDKit::MolHolder::getMols
std::vector< boost::shared_ptr< ROMol > > & getMols()
Definition: SubstructLibrary.h:92
RDLog::toStream
RDKIT_RDGENERAL_EXPORT std::ostream & toStream(std::ostream &)
RDKit::CachedMolHolder
Concrete class that holds binary cached molecules in memory.
Definition: SubstructLibrary.h:104
RDKit::CachedMolHolder::CachedMolHolder
CachedMolHolder()
Definition: SubstructLibrary.h:108
RDKit::CachedTrustedSmilesMolHolder::size
virtual unsigned int size() const
Get the current library size.
Definition: SubstructLibrary.h:224
RDKit::CachedMolHolder::addBinary
unsigned int addBinary(const std::string &pickle)
Definition: SubstructLibrary.h:118
RDKit::MolHolderBase::~MolHolderBase
virtual ~MolHolderBase()
Definition: SubstructLibrary.h:54
RDKit::SubstructLibrary::getMolecules
const MolHolderBase & getMolecules() const
Definition: SubstructLibrary.h:406
RDKit::SubstructLibrary::getMol
boost::shared_ptr< ROMol > getMol(unsigned int idx) const
Returns the molecule at the given index.
Definition: SubstructLibrary.h:549
RDKit::FPHolderBase::addFingerprint
unsigned int addFingerprint(const ExplicitBitVect &v)
Adds a raw bit vector to the fingerprinter.
Definition: SubstructLibrary.h:248
RDKit::CachedSmilesMolHolder::getMols
std::vector< std::string > & getMols()
Definition: SubstructLibrary.h:178
RDKit::CachedSmilesMolHolder::addSmiles
unsigned int addSmiles(const std::string &smiles)
Definition: SubstructLibrary.h:162
RDKit::MolHolder::addMol
virtual unsigned int addMol(const ROMol &m)
Definition: SubstructLibrary.h:78
RDKIT_SUBSTRUCTLIBRARY_EXPORT
#define RDKIT_SUBSTRUCTLIBRARY_EXPORT
Definition: export.h:671
ExplicitBitVect
a class for bit vectors that are densely occupied
Definition: ExplicitBitVect.h:29
export.h
RDKit::SubstructLibrary::SubstructLibrary
SubstructLibrary(boost::shared_ptr< MolHolderBase > molecules)
Definition: SubstructLibrary.h:373