RDKit
Open-source cheminformatics and machine learning.
Catalog.h
Go to the documentation of this file.
1 //
2 // Copyright (C) 2003-2006 Rational Discovery LLC
3 //
4 // @@ All Rights Reserved @@
5 // This file is part of the RDKit.
6 // The contents are covered by the terms of the BSD license
7 // which is included in the file license.txt, found at the root
8 // of the RDKit source tree.
9 //
10 
11 #include <RDGeneral/export.h>
12 #ifndef RD_CATALOG_H
13 #define RD_CATALOG_H
14 
15 // Boost graph stuff
17 #include <boost/graph/graph_traits.hpp>
18 #include <boost/graph/adjacency_list.hpp>
19 #include <boost/version.hpp>
20 #if BOOST_VERSION >= 104000
21 #include <boost/property_map/property_map.hpp>
22 #else
23 #include <boost/property_map.hpp>
24 #endif
26 
27 // for some typedefs
28 #include <RDGeneral/types.h>
29 #include <RDGeneral/StreamOps.h>
30 
31 namespace RDCatalog {
32 const int versionMajor = 1;
33 const int versionMinor = 0;
34 const int versionPatch = 0;
35 const int endianId = 0xDEADBEEF;
36 
37 //-----------------------------------------------------------------------------
38 //! abstract base class for a catalog object
39 template <class entryType, class paramType>
40 class Catalog {
41  public:
42  typedef entryType entryType_t;
43  typedef paramType paramType_t;
44 
45  //------------------------------------
47 
48  //------------------------------------
49  virtual ~Catalog() { delete dp_cParams; }
50 
51  //------------------------------------
52  //! return a serialized form of the Catalog as an std::string
53  virtual std::string Serialize() const = 0;
54 
55  //------------------------------------
56  //! adds an entry to the catalog
57  /*!
58 
59  \param entry the entry to be added
60  \param updateFPLength (optional) if this is true, our internal
61  fingerprint length will also be updated.
62 
63  */
64  virtual unsigned int addEntry(entryType *entry,
65  bool updateFPLength = true) = 0;
66 
67  //------------------------------------
68  //! returns a particular entry in the Catalog
69  virtual const entryType *getEntryWithIdx(unsigned int idx) const = 0;
70 
71  //------------------------------------
72  //! returns the number of entries
73  virtual unsigned int getNumEntries() const = 0;
74 
75  //------------------------------------
76  //! returns the length of our fingerprint
77  unsigned int getFPLength() const { return d_fpLength; }
78 
79  //------------------------------------
80  //! sets our fingerprint length
81  void setFPLength(unsigned int val) { d_fpLength = val; }
82 
83  //------------------------------------
84  //! sets our parameters by copying the \c params argument
85  virtual void setCatalogParams(const paramType *params) {
86  PRECONDITION(params, "bad parameter object");
87  // if we already have a paramter object throw an exception
89  "A parameter object already exists on the catalog");
90  /*
91  if (dp_cParams) {
92  // we already have parameter object on the catalog
93  // can't overwrite it
94  PRECONDITION(0, "A parameter object already exist on the catalog");
95  }*/
96  dp_cParams = new paramType(*params);
97  }
98 
99  //------------------------------------
100  //! returns a pointer to our parameters
101  const paramType *getCatalogParams() const { return dp_cParams; }
102 
103  protected:
104  // this is the ID that will be assigned to the next entry
105  // added to the catalog - need not be same as the number of entries
106  // in the catalog and does not correspond with the
107  // id of the entry in the catalog.
108  // this is more along the lines of bitId
109  unsigned int d_fpLength; //!< the length of our fingerprint
110  paramType *dp_cParams; //!< our params object
111 };
112 
113 //-----------------------------------------------------------------------------
114 //! A Catalog with a hierarchical structure
115 /*!
116 
117  The entries of a HierarchCatalog are arranged in a directed graph
118 
119  <b>The difference between <i>Indices</i> and <i>Bit Ids</i></b>
120 
121  A HierarchCatalog may contain more entries than the user is actually
122  interested in. For example a HierarchCatalog constructed to contain
123  orders 5 through 8 may well contain information about orders 1-5,
124  in order to facilitate some search optimizations.
125 
126  - <i>Bit Ids</i> refer to the "interesting" bits.
127  So, in the above example, Bit Id \c 0 will be the first entry
128  with order 5.
129  - <i>Indices</i> refer to the underlying structure of the catalog.
130  So, in the above example, the entry with index \c 0 will be
131  the first entry with order 1.
132 
133 */
134 template <class entryType, class paramType, class orderType>
135 class HierarchCatalog : public Catalog<entryType, paramType> {
136  // the entries in the catalog can be traversed using the edges
137  // in a desired order
138  public:
139  //! used by the BGL to set up the node properties in our graph
140  struct vertex_entry_t {
141  enum { num = 1003 };
142  typedef boost::vertex_property_tag kind;
143  };
144  typedef boost::property<vertex_entry_t, entryType *> EntryProperty;
145 
146  //! the type of the graph itself:
147  typedef boost::adjacency_list<
148  boost::vecS,
149  boost::vecS, // FIX: should be using setS for edges so that parallel
150  // edges are never added (page 225 BGL book)
151  // but that seems result in compile errors
152  boost::bidirectionalS, EntryProperty>
154 
155  typedef boost::graph_traits<CatalogGraph> CAT_GRAPH_TRAITS;
156  typedef typename CAT_GRAPH_TRAITS::vertex_iterator VER_ITER;
157  typedef std::pair<VER_ITER, VER_ITER> ENT_ITER_PAIR;
158  typedef typename CAT_GRAPH_TRAITS::adjacency_iterator DOWN_ENT_ITER;
159  typedef std::pair<DOWN_ENT_ITER, DOWN_ENT_ITER> DOWN_ENT_ITER_PAIR;
160 
161  //------------------------------------
163 
164  //------------------------------------
165  //! Construct by making a copy of the input \c params object
167  : Catalog<entryType, paramType>() {
168  this->setCatalogParams(params);
169  }
170 
171  //------------------------------------
172  //! Construct from a \c pickle (a serialized form of the HierarchCatalog)
174  this->initFromString(pickle);
175  }
176 
177  //------------------------------------
178  ~HierarchCatalog() { destroy(); }
179 
180  //------------------------------------
181  //! serializes this object to a stream
182  void toStream(std::ostream &ss) const {
183  PRECONDITION(this->getCatalogParams(), "NULL parameter object");
184 
185  // the i/o header:
190 
191  // information about the catalog itself:
192  int tmpUInt;
193  tmpUInt = this->getFPLength();
194  RDKit::streamWrite(ss, tmpUInt);
195  tmpUInt = this->getNumEntries();
196  RDKit::streamWrite(ss, tmpUInt);
197 
198  // std::cout << ">>>>-------------------------------" << std::endl;
199  // std::cout << "\tlength: " << getFPLength() << " " << getNumEntries() <<
200  // std::endl;
201 
202  // add the params object:
203  this->getCatalogParams()->toStream(ss);
204  // std::cout << "\tparams: " << getCatalogParams()->getLowerFragLength();
205  // std::cout << " " << getCatalogParams()->getUpperFragLength();
206  // std::cout << " " << getCatalogParams()->getNumFuncGroups();
207  // std::cout << std::endl;
208 
209  // write the entries in order:
210  for (unsigned int i = 0; i < getNumEntries(); i++) {
211  this->getEntryWithIdx(i)->toStream(ss);
212  }
213 
214  // finally the adjacency list:
215  for (unsigned int i = 0; i < getNumEntries(); i++) {
216  RDKit::INT_VECT children = this->getDownEntryList(i);
217  tmpUInt = static_cast<unsigned int>(children.size());
218  RDKit::streamWrite(ss, tmpUInt);
219  for (RDKit::INT_VECT::const_iterator ivci = children.begin();
220  ivci != children.end(); ivci++) {
221  RDKit::streamWrite(ss, *ivci);
222  }
223  }
224  }
225 
226  //------------------------------------
227  //! serializes this object and returns the resulting \c pickle
228  std::string Serialize() const {
229  std::stringstream ss(std::ios_base::binary | std::ios_base::out |
230  std::ios_base::in);
231  this->toStream(ss);
232  return ss.str();
233  }
234 
235  //------------------------------------
236  //! fills the contents of this object from a stream containing a \c pickle
237  void initFromStream(std::istream &ss) {
238  int tmpInt;
239  // FIX: at the moment we ignore the header info:
240  RDKit::streamRead(ss, tmpInt);
241  RDKit::streamRead(ss, tmpInt);
242  RDKit::streamRead(ss, tmpInt);
243  RDKit::streamRead(ss, tmpInt);
244 
245  unsigned int tmpUInt;
246  RDKit::streamRead(ss, tmpUInt); // fp length
247  this->setFPLength(tmpUInt);
248 
249  unsigned int numEntries;
250  RDKit::streamRead(ss, numEntries);
251  // std::cout << "<<<-------------------------------" << std::endl;
252  // std::cout << "\tlength: " << getFPLength() << " " << numEntries <<
253  // std::endl;
254 
255  // grab the params:
256  paramType *params = new paramType();
257  params->initFromStream(ss);
258  this->setCatalogParams(params);
259  delete params;
260 
261  // std::cout << "\tparams: " << getCatalogParams()->getLowerFragLength();
262  // std::cout << " " << getCatalogParams()->getUpperFragLength();
263  // std::cout << " " << getCatalogParams()->getNumFuncGroups();
264  // std::cout << std::endl;
265 
266  // now all of the entries:
267  for (unsigned int i = 0; i < numEntries; i++) {
268  entryType *entry = new entryType();
269  entry->initFromStream(ss);
270  this->addEntry(entry, false);
271  }
272 
273  // and, finally, the adjacency list:
274  for (unsigned int i = 0; i < numEntries; i++) {
275  unsigned int nNeighbors;
276  RDKit::streamRead(ss, nNeighbors);
277  for (unsigned int j = 0; j < nNeighbors; j++) {
278  RDKit::streamRead(ss, tmpInt);
279  this->addEdge(i, tmpInt);
280  }
281  }
282  }
283 
284  //------------------------------------
285  unsigned int getNumEntries() const {
286  return static_cast<unsigned int>(boost::num_vertices(d_graph));
287  }
288 
289  //------------------------------------
290  //! fills the contents of this object from a string containing a \c pickle
291  void initFromString(const std::string &text) {
292  std::stringstream ss(std::ios_base::binary | std::ios_base::out |
293  std::ios_base::in);
294  // initialize the stream:
295  ss.write(text.c_str(), text.length());
296  // now start reading out values:
297  this->initFromStream(ss);
298  }
299 
300  //------------------------------------
301  //! add a new entry to the catalog
302  /*!
303 
304  \param entry the entry to be added
305  \param updateFPLength (optional) if this is true, our internal
306  fingerprint length will also be updated.
307 
308  */
309  unsigned int addEntry(entryType *entry, bool updateFPLength = true) {
310  PRECONDITION(entry, "bad arguments");
311  if (updateFPLength) {
312  unsigned int fpl = this->getFPLength();
313  entry->setBitId(fpl);
314  fpl++;
315  this->setFPLength(fpl);
316  }
317  unsigned int eid = static_cast<unsigned int>(
318  boost::add_vertex(EntryProperty(entry), d_graph));
319  orderType etype = entry->getOrder();
320  // REVIEW: this initialization is not required: the STL map, in
321  // theory, will create a new object when operator[] is called
322  // for a new item
323  if (d_orderMap.find(etype) == d_orderMap.end()) {
324  RDKit::INT_VECT nets;
325  d_orderMap[etype] = nets;
326  }
327  d_orderMap[etype].push_back(eid);
328  return eid;
329  }
330 
331  //------------------------------------
332  //! adds an edge between two entries in the catalog
333  /*!
334  Since we are using a bidirectional graph - the order in
335  which the ids are supplied here makes a difference
336 
337  \param id1 index of the edge's beginning
338  \param id2 index of the edge's end
339 
340  */
341  void addEdge(unsigned int id1, unsigned int id2) {
342  unsigned int nents = getNumEntries();
343  URANGE_CHECK(id1, nents);
344  URANGE_CHECK(id2, nents);
345  // FIX: if we boost::setS for the edgeList BGL will
346  // do the checking for duplicity (parallel edges)
347  // But for reasons unknown setS results in compile
348  // errors while using adjacent_vertices.
349  typename CAT_GRAPH_TRAITS::edge_descriptor edge;
350  bool found;
351  boost::tie(edge, found) = boost::edge(boost::vertex(id1, d_graph),
352  boost::vertex(id2, d_graph), d_graph);
353  if (!found) {
354  boost::add_edge(id1, id2, d_graph);
355  }
356  }
357 
358  //------------------------------------
359  //! returns a pointer to our entry with a particular index
360  const entryType *getEntryWithIdx(unsigned int idx) const {
361  URANGE_CHECK(idx, getNumEntries());
362  int vd = static_cast<int>(boost::vertex(idx, d_graph));
363  typename boost::property_map<CatalogGraph, vertex_entry_t>::const_type
364  pMap = boost::get(vertex_entry_t(), d_graph);
365  return pMap[vd];
366  }
367 
368  //------------------------------------
369  //! returns a pointer to our entry with a particular bit ID
370  const entryType *getEntryWithBitId(unsigned int idx) const {
371  URANGE_CHECK(idx, this->getFPLength());
372  typename boost::property_map<CatalogGraph, vertex_entry_t>::const_type
373  pMap = boost::get(vertex_entry_t(), d_graph);
374  const entryType *res = NULL;
375  for (unsigned int i = idx; i < this->getNumEntries(); i++) {
376  const entryType *e = pMap[i];
377  if (e->getBitId() == static_cast<int>(idx)) {
378  res = e;
379  break;
380  }
381  }
382  return res;
383  }
384 
385  //------------------------------------
386  //! returns the index of the entry with a particular bit ID
387  int getIdOfEntryWithBitId(unsigned int idx) const {
388  URANGE_CHECK(idx, this->getFPLength());
389  typename boost::property_map<CatalogGraph, vertex_entry_t>::const_type
390  pMap = boost::get(vertex_entry_t(), d_graph);
391  int res = -1;
392  for (unsigned int i = idx; i < this->getNumEntries(); i++) {
393  const entryType *e = pMap[i];
394  if (static_cast<unsigned int>(e->getBitId()) == idx) {
395  res = i;
396  break;
397  }
398  }
399  return res;
400  }
401 
402  //------------------------------------
403  //! returns a list of the indices of entries below the one passed in
404  RDKit::INT_VECT getDownEntryList(unsigned int idx) const {
405  RDKit::INT_VECT res;
406  DOWN_ENT_ITER nbrIdx, endIdx;
407  boost::tie(nbrIdx, endIdx) = boost::adjacent_vertices(idx, d_graph);
408  while (nbrIdx != endIdx) {
409  res.push_back(static_cast<int>(*nbrIdx));
410  nbrIdx++;
411  }
412  // std::cout << res.size() << "\n";
413  return res;
414  }
415 
416  //------------------------------------
417  //! returns a list of the indices that have a particular order
418  const RDKit::INT_VECT &getEntriesOfOrder(orderType ord) {
419  return d_orderMap[ord];
420  }
421 
422  //------------------------------------
423  //! returns a list of the indices that have a particular order
424  /*!
425  \overload
426  */
427  const RDKit::INT_VECT &getEntriesOfOrder(orderType ord) const {
428  typename std::map<orderType, RDKit::INT_VECT>::const_iterator elem;
429  elem = d_orderMap.find(ord);
431  elem != d_orderMap.end(),
432  " catalog does not contain any entries of the order specified");
433  return elem->second;
434  }
435 
436  private:
437  // graphs that store the entries in the catalog in a hierachical manner
438  CatalogGraph d_graph;
439  // a map that maps the order type of entries in the catalog to
440  // a vector of vertex indices in the graphs above
441  // e.g. for a catalog with molecular fragments, the order of a fragment can
442  // simply be the number of bond in it. The list this oder maps to is all the
443  // vertex ids of these fragment in the catalog that have this many bonds in
444  // them
445  std::map<orderType, RDKit::INT_VECT> d_orderMap;
446 
447  //------------------------------------
448  //! clear any memory that we've used
449  void destroy() {
450  ENT_ITER_PAIR entItP = boost::vertices(d_graph);
451  typename boost::property_map<CatalogGraph, vertex_entry_t>::type pMap =
452  boost::get(vertex_entry_t(), d_graph);
453  while (entItP.first != entItP.second) {
454  delete pMap[*(entItP.first++)];
455  }
456  }
457 };
458 
459 } // namespace RDCatalog
460 
461 #endif
RDCatalog::Catalog::dp_cParams
paramType * dp_cParams
our params object
Definition: Catalog.h:110
RDCatalog::HierarchCatalog::EntryProperty
boost::property< vertex_entry_t, entryType * > EntryProperty
Definition: Catalog.h:144
RDCatalog::HierarchCatalog::getNumEntries
unsigned int getNumEntries() const
returns the number of entries
Definition: Catalog.h:285
RDKit::EnumerationStrategyPickler::pickle
RDKIT_CHEMREACTIONS_EXPORT void pickle(const boost::shared_ptr< EnumerationStrategyBase > &enumerator, std::ostream &ss)
pickles a EnumerationStrategy and adds the results to a stream ss
RDCatalog::HierarchCatalog::CAT_GRAPH_TRAITS
boost::graph_traits< CatalogGraph > CAT_GRAPH_TRAITS
Definition: Catalog.h:155
RDCatalog::Catalog::~Catalog
virtual ~Catalog()
Definition: Catalog.h:49
RDKit::INT_VECT
std::vector< int > INT_VECT
Definition: types.h:254
RDCatalog::HierarchCatalog::vertex_entry_t
used by the BGL to set up the node properties in our graph
Definition: Catalog.h:140
RDCatalog::HierarchCatalog::initFromString
void initFromString(const std::string &text)
fills the contents of this object from a string containing a pickle
Definition: Catalog.h:291
types.h
BoostStartInclude.h
RDCatalog::HierarchCatalog::getEntryWithIdx
const entryType * getEntryWithIdx(unsigned int idx) const
returns a pointer to our entry with a particular index
Definition: Catalog.h:360
RDCatalog::Catalog::getEntryWithIdx
virtual const entryType * getEntryWithIdx(unsigned int idx) const =0
returns a particular entry in the Catalog
RDCatalog::Catalog::Serialize
virtual std::string Serialize() const =0
return a serialized form of the Catalog as an std::string
RDCatalog::Catalog::setFPLength
void setFPLength(unsigned int val)
sets our fingerprint length
Definition: Catalog.h:81
RDCatalog::HierarchCatalog::getEntriesOfOrder
const RDKit::INT_VECT & getEntriesOfOrder(orderType ord) const
returns a list of the indices that have a particular order
Definition: Catalog.h:427
RDCatalog::versionPatch
const int versionPatch
Definition: Catalog.h:34
CHECK_INVARIANT
#define CHECK_INVARIANT(expr, mess)
Definition: Invariant.h:101
RDKit::streamRead
void streamRead(std::istream &ss, T &loc)
does a binary read of an object from a stream
Definition: StreamOps.h:246
RDCatalog::HierarchCatalog::getIdOfEntryWithBitId
int getIdOfEntryWithBitId(unsigned int idx) const
returns the index of the entry with a particular bit ID
Definition: Catalog.h:387
RDCatalog::Catalog::getNumEntries
virtual unsigned int getNumEntries() const =0
returns the number of entries
BoostEndInclude.h
RDCatalog::Catalog::paramType_t
paramType paramType_t
Definition: Catalog.h:43
RDCatalog::HierarchCatalog::getEntriesOfOrder
const RDKit::INT_VECT & getEntriesOfOrder(orderType ord)
returns a list of the indices that have a particular order
Definition: Catalog.h:418
RDCatalog::HierarchCatalog::addEntry
unsigned int addEntry(entryType *entry, bool updateFPLength=true)
add a new entry to the catalog
Definition: Catalog.h:309
RDCatalog::HierarchCatalog
A Catalog with a hierarchical structure.
Definition: Catalog.h:135
RDCatalog::HierarchCatalog::addEdge
void addEdge(unsigned int id1, unsigned int id2)
adds an edge between two entries in the catalog
Definition: Catalog.h:341
RDKit::streamWrite
void streamWrite(std::ostream &ss, const T &val)
does a binary write of an object to a stream
Definition: StreamOps.h:226
RDCatalog::HierarchCatalog::initFromStream
void initFromStream(std::istream &ss)
fills the contents of this object from a stream containing a pickle
Definition: Catalog.h:237
RDCatalog
Definition: Catalog.h:31
RDCatalog::Catalog
abstract base class for a catalog object
Definition: Catalog.h:40
RDCatalog::HierarchCatalog::VER_ITER
CAT_GRAPH_TRAITS::vertex_iterator VER_ITER
Definition: Catalog.h:156
RDCatalog::HierarchCatalog::ENT_ITER_PAIR
std::pair< VER_ITER, VER_ITER > ENT_ITER_PAIR
Definition: Catalog.h:157
RDCatalog::versionMajor
const int versionMajor
Definition: Catalog.h:32
RDCatalog::HierarchCatalog::CatalogGraph
boost::adjacency_list< boost::vecS, boost::vecS, boost::bidirectionalS, EntryProperty > CatalogGraph
the type of the graph itself:
Definition: Catalog.h:153
RDCatalog::Catalog::d_fpLength
unsigned int d_fpLength
the length of our fingerprint
Definition: Catalog.h:109
RDCatalog::HierarchCatalog::vertex_entry_t::num
@ num
Definition: Catalog.h:141
RDCatalog::HierarchCatalog::getDownEntryList
RDKit::INT_VECT getDownEntryList(unsigned int idx) const
returns a list of the indices of entries below the one passed in
Definition: Catalog.h:404
RDCatalog::versionMinor
const int versionMinor
Definition: Catalog.h:33
RDCatalog::Catalog::getCatalogParams
const paramType * getCatalogParams() const
returns a pointer to our parameters
Definition: Catalog.h:101
RDCatalog::HierarchCatalog::getEntryWithBitId
const entryType * getEntryWithBitId(unsigned int idx) const
returns a pointer to our entry with a particular bit ID
Definition: Catalog.h:370
RDCatalog::HierarchCatalog::Serialize
std::string Serialize() const
serializes this object and returns the resulting pickle
Definition: Catalog.h:228
RDCatalog::HierarchCatalog::DOWN_ENT_ITER
CAT_GRAPH_TRAITS::adjacency_iterator DOWN_ENT_ITER
Definition: Catalog.h:158
StreamOps.h
PRECONDITION
#define PRECONDITION(expr, mess)
Definition: Invariant.h:109
URANGE_CHECK
#define URANGE_CHECK(x, hi)
Definition: Invariant.h:142
RDCatalog::Catalog::addEntry
virtual unsigned int addEntry(entryType *entry, bool updateFPLength=true)=0
adds an entry to the catalog
RDCatalog::Catalog::entryType_t
entryType entryType_t
Definition: Catalog.h:42
RDCatalog::Catalog::Catalog
Catalog()
Definition: Catalog.h:46
RDCatalog::HierarchCatalog::DOWN_ENT_ITER_PAIR
std::pair< DOWN_ENT_ITER, DOWN_ENT_ITER > DOWN_ENT_ITER_PAIR
Definition: Catalog.h:159
RDCatalog::HierarchCatalog::~HierarchCatalog
~HierarchCatalog()
Definition: Catalog.h:178
RDCatalog::HierarchCatalog::toStream
void toStream(std::ostream &ss) const
serializes this object to a stream
Definition: Catalog.h:182
RDCatalog::Catalog::setCatalogParams
virtual void setCatalogParams(const paramType *params)
sets our parameters by copying the params argument
Definition: Catalog.h:85
RDCatalog::HierarchCatalog::vertex_entry_t::kind
boost::vertex_property_tag kind
Definition: Catalog.h:142
RDCatalog::endianId
const int endianId
Definition: Catalog.h:35
RDCatalog::Catalog::getFPLength
unsigned int getFPLength() const
returns the length of our fingerprint
Definition: Catalog.h:77
export.h