RDKit
Open-source cheminformatics and machine learning.
EnumerationStrategyBase.h
Go to the documentation of this file.
1 //
2 // Copyright (c) 2015, Novartis Institutes for BioMedical Research Inc.
3 // All rights reserved.
4 //
5 // Redistribution and use in source and binary forms, with or without
6 // modification, are permitted provided that the following conditions are
7 // met:
8 //
9 // * Redistributions of source code must retain the above copyright
10 // notice, this list of conditions and the following disclaimer.
11 // * Redistributions in binary form must reproduce the above
12 // copyright notice, this list of conditions and the following
13 // disclaimer in the documentation and/or other materials provided
14 // with the distribution.
15 // * Neither the name of Novartis Institutes for BioMedical Research Inc.
16 // nor the names of its contributors may be used to endorse or promote
17 // products derived from this software without specific prior written
18 // permission.
19 //
20 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
23 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
24 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
25 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
26 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
30 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 //
32 #include <RDGeneral/export.h>
33 #ifndef ENUMERATION_STRATEGY_H
34 #define ENUMERATION_STRATEGY_H
35 
36 #include "EnumerateTypes.h"
37 #include "../Reaction.h"
38 #include <vector>
40 #include <cstdint>
41 #ifdef RDK_USE_BOOST_SERIALIZATION
42 #include <boost/serialization/assume_abstract.hpp>
43 #include <boost/serialization/vector.hpp>
44 // the next two includes need to be there for boost 1.56
45 #include <boost/serialization/singleton.hpp>
46 #include <boost/serialization/extended_type_info.hpp>
47 #include <boost/serialization/shared_ptr.hpp>
48 #endif
50 
51 #include <GraphMol/RDKitBase.h>
52 
53 namespace RDKit {
54 
55 //! class for flagging enumeration strategy errors
57  : public std::exception {
58  public:
59  EnumerationStrategyException(const char *msg) : _msg(msg){};
60  EnumerationStrategyException(const std::string &msg) : _msg(msg){};
61  const char *message() const { return _msg.c_str(); };
63 
64  private:
65  std::string _msg;
66 };
67 
68 //! Return the number of elements per input vector
69 /*! \param bbs vector<vector<T> >
70 
71  \result vector<unint64_t> number of elements in each vector
72  */
73 template <class T>
75  const std::vector<std::vector<T>> &bbs) {
77  for (size_t i = 0; i < bbs.size(); ++i) sizes.push_back(bbs[i].size());
78  return sizes;
79 }
80 
81 //! getSizesFromReactants
82 //! Helper function for enumeration, bbs are stored in a
83 //! std::vector< std::vector<boost:shared_ptr<ROMol> >
84 //
86  const std::vector<MOL_SPTR_VECT> &bbs);
87 
88 //! getReactantsFromRGroups
89 //! Helper function for enumeration, bbs are stored in a
90 //! std::vector< std::vector<boost:shared_ptr<ROMol> >
91 //
93 getReactantsFromRGroups(const std::vector<MOL_SPTR_VECT> &bbs,
94  const EnumerationTypes::RGROUPS &rgroups);
95 
96 //! computeNumProducts
97 //! Returns the number of possible product combination from
98 //! The given numbers of building blocks for each rgroup
99 //! or EnumerationStrategyBase::EnumerationOverflow if the
100 //! number will not fit into the machines integer type.
101 //! n.b. An overflow simply means there are a lot of products
102 //! not that they cannot be enumerated
104  const EnumerationTypes::RGROUPS &sizes);
105 
106 //! Base Class for enumeration strageties
107 //! Usage:
108 //! EnumerationStrategyBase must be initialized with both a reaction
109 //! and the building block (molecule) vector to be sampled.
110 //!
111 //! \verbatim
112 //! EnumerationStrategyBase &eb = ...
113 //! if(eb) { // can we get another entry
114 //! const std::vector<int> &v = eb.next();
115 //! v[0] // RGroup 0 position
116 //! v[1] // RGroup 1 position...
117 //! }
118 //! \endverbatim
119 
121  protected:
122  EnumerationTypes::RGROUPS m_permutation; // where are we currently?
124  m_permutationSizes; // m_permutationSizes num bbs per group
125  boost::uint64_t
126  m_numPermutations; // total number of permutations for this group
127  // -1 if > ssize_t::max
128  public:
129  static const boost::uint64_t EnumerationOverflow =
130  static_cast<boost::uint64_t>(-1);
132  : m_permutation(), m_permutationSizes(), m_numPermutations() {}
133 
135 
136  virtual const char *type() const { return "EnumerationStrategyBase"; }
137 
138  //! Initialize the enumerator based on the reaction and the
139  //! supplied building blocks
140  //! This is the standard API point.
141  void initialize(const ChemicalReaction &reaction,
142  const EnumerationTypes::BBS &building_blocks) {
143  // default initialization, may be overridden (sets the # reactants
144  // and computes the default # of permutations)
145  m_permutationSizes = getSizesFromBBs(building_blocks);
146  m_permutation.resize(m_permutationSizes.size());
147 
148  m_numPermutations = computeNumProducts(m_permutationSizes);
149  std::fill(m_permutation.begin(), m_permutation.end(), 0);
150 
151  initializeStrategy(reaction, building_blocks);
152  }
153 
154  // ! Initialize derived class
155  // ! must exist, EnumerationStrategyBase structures are already initialized
156  virtual void initializeStrategy(
157  const ChemicalReaction &reaction,
158  const EnumerationTypes::BBS &building_blocks) = 0;
159 
160  //! returns true if there are more permutations left
161  //! random enumerators may always return true...
162  virtual operator bool() const = 0;
163 
164  //! The current permutation {r1, r2, ...}
165  virtual const EnumerationTypes::RGROUPS &next() = 0;
166 
167  //! copy the enumeration strategy complete with current state
168  virtual EnumerationStrategyBase *copy() const = 0;
169 
170  //! The current position in the enumeration
171  const EnumerationTypes::RGROUPS &getPosition() const { return m_permutation; }
172 
173  //! a result of EnumerationOverflow indicates that the number of
174  //! permutations is not computable with the current
175  //! rdlonglong size.
176  boost::uint64_t getNumPermutations() const { return m_numPermutations; }
177 
178  //! Returns how many permutations have been processed by this strategy
179  virtual boost::uint64_t getPermutationIdx() const = 0;
180 
181  //! Skip the specified number of permutations (useful for
182  //! resetting state to a known position)
183  bool skip(boost::uint64_t skipCount) {
184  for (boost::uint64_t i = 0; i < skipCount; ++i) next();
185  return true;
186  }
187 
188  protected:
189  //! Initialize the internal data structures
190  //! i.e. RGROUPS = {10,40,50};
192  m_permutation.resize(rgroups.size());
193  m_permutationSizes = rgroups;
194  m_numPermutations = computeNumProducts(m_permutationSizes);
195  std::fill(m_permutation.begin(), m_permutation.end(), 0);
196  }
197 
198  private:
199  friend class boost::serialization::access;
200  template <class Archive>
201  void serialize(Archive &ar, const unsigned int /*version*/) {
202  ar &m_permutation;
203  ar &m_permutationSizes;
204  ar &m_numPermutations;
205  }
206 };
207 #ifdef RDK_USE_BOOST_SERIALIZATION
208 BOOST_SERIALIZATION_ASSUME_ABSTRACT(EnumerationStrategyBase)
209 #endif
210 } // namespace RDKit
211 
212 #ifdef RDK_USE_BOOST_SERIALIZATION
213 BOOST_CLASS_VERSION(RDKit::EnumerationStrategyBase, 1)
214 #endif
215 
216 #endif
RDKit::EnumerationStrategyException
class for flagging enumeration strategy errors
Definition: EnumerationStrategyBase.h:56
RDKit::EnumerationStrategyBase::m_numPermutations
boost::uint64_t m_numPermutations
Definition: EnumerationStrategyBase.h:126
BoostStartInclude.h
RDKit::EnumerationStrategyBase::getPosition
const EnumerationTypes::RGROUPS & getPosition() const
The current position in the enumeration.
Definition: EnumerationStrategyBase.h:171
RDKit::EnumerationStrategyBase::getNumPermutations
boost::uint64_t getNumPermutations() const
Definition: EnumerationStrategyBase.h:176
RDKit::EnumerationStrategyException::~EnumerationStrategyException
~EnumerationStrategyException()
Definition: EnumerationStrategyBase.h:62
RDKit::getReactantsFromRGroups
RDKIT_CHEMREACTIONS_EXPORT MOL_SPTR_VECT getReactantsFromRGroups(const std::vector< MOL_SPTR_VECT > &bbs, const EnumerationTypes::RGROUPS &rgroups)
RDKit::EnumerationStrategyBase::internalInitialize
void internalInitialize(const EnumerationTypes::RGROUPS &rgroups)
Definition: EnumerationStrategyBase.h:191
RDKit::EnumerationStrategyException::EnumerationStrategyException
EnumerationStrategyException(const char *msg)
Definition: EnumerationStrategyBase.h:59
BoostEndInclude.h
RDKit::EnumerationStrategyBase::skip
bool skip(boost::uint64_t skipCount)
Definition: EnumerationStrategyBase.h:183
RDKit::EnumerationStrategyBase::m_permutationSizes
EnumerationTypes::RGROUPS m_permutationSizes
Definition: EnumerationStrategyBase.h:124
RDKitBase.h
pulls in the core RDKit functionality
RDKit::MOL_SPTR_VECT
std::vector< boost::shared_ptr< ROMol > > MOL_SPTR_VECT
Definition: FragCatParams.h:20
RDKit::EnumerationTypes::BBS
std::vector< MOL_SPTR_VECT > BBS
Definition: EnumerateTypes.h:42
RDKit::EnumerationStrategyBase::EnumerationStrategyBase
EnumerationStrategyBase()
Definition: EnumerationStrategyBase.h:131
RDKit::ChemicalReaction
This is a class for storing and applying general chemical reactions.
Definition: Reaction.h:119
RDKit::EnumerationStrategyException::EnumerationStrategyException
EnumerationStrategyException(const std::string &msg)
Definition: EnumerationStrategyBase.h:60
RDKit::EnumerationStrategyBase::type
virtual const char * type() const
Definition: EnumerationStrategyBase.h:136
RDKit::EnumerationStrategyBase
Definition: EnumerationStrategyBase.h:120
RDKit::getSizesFromReactants
RDKIT_CHEMREACTIONS_EXPORT EnumerationTypes::RGROUPS getSizesFromReactants(const std::vector< MOL_SPTR_VECT > &bbs)
RDKit::EnumerationTypes::RGROUPS
std::vector< boost::uint64_t > RGROUPS
Definition: EnumerateTypes.h:56
RDKit
Std stuff.
Definition: Atom.h:30
RDKit::computeNumProducts
RDKIT_CHEMREACTIONS_EXPORT boost::uint64_t computeNumProducts(const EnumerationTypes::RGROUPS &sizes)
RDKit::EnumerationStrategyException::message
const char * message() const
Definition: EnumerationStrategyBase.h:61
EnumerateTypes.h
RDKit::EnumerationStrategyBase::~EnumerationStrategyBase
virtual ~EnumerationStrategyBase()
Definition: EnumerationStrategyBase.h:134
RDKIT_CHEMREACTIONS_EXPORT
#define RDKIT_CHEMREACTIONS_EXPORT
Definition: export.h:60
RDKit::getSizesFromBBs
EnumerationTypes::RGROUPS getSizesFromBBs(const std::vector< std::vector< T >> &bbs)
Return the number of elements per input vector.
Definition: EnumerationStrategyBase.h:74
RDKit::EnumerationStrategyBase::m_permutation
EnumerationTypes::RGROUPS m_permutation
Definition: EnumerationStrategyBase.h:122
RDKit::EnumerationStrategyBase::initialize
void initialize(const ChemicalReaction &reaction, const EnumerationTypes::BBS &building_blocks)
Definition: EnumerationStrategyBase.h:141
export.h