RDKit
Open-source cheminformatics and machine learning.
MultiFPBReader.h
Go to the documentation of this file.
1 //
2 // Copyright (c) 2016 Greg Landrum
3 //
4 // @@ All Rights Reserved @@
5 // This file is part of the RDKit.
6 // The contents are covered by the terms of the BSD license
7 // which is included in the file license.txt, found at the root
8 // of the RDKit source tree.
9 //
10 #include <RDGeneral/export.h>
11 #ifndef RD_MULTIFPBREADER_H_APR2016
12 #define RD_MULTIFPBREADER_H_APR2016
13 /*! \file MultiFPBReader.h
14 
15  \brief contains a class for reading and searching collections of FPB files
16 
17  \b Note that this functionality is experimental and the API may change
18  in future releases.
19 */
20 
21 #include <RDGeneral/Exceptions.h>
23 #include <DataStructs/FPBReader.h>
24 #include <boost/tuple/tuple.hpp>
25 #include <boost/foreach.hpp>
26 
27 namespace RDKit {
28 
29 //! class for reading and searching multiple FPB files
30 /*!
31  basic usage:
32  \code
33  FPBReader r1("foo1.fpb"),r2("foo2.fpb");
34  std::vector<FPBReader *> readers;
35  readers.append(&r1);
36  readers.append(&r2);
37  MultiFPBReader fpbs(readers);
38  fpbs.init();
39  boost::shared_ptr<ExplicitBitVect> ebv = fpbs.getReader(0)->getFP(95);
40  std::vector<boost::tuple<double,unsigned int, unsigned int> > nbrs =
41  fpbs.getTanimotoNeighbors(*ebv.get(), 0.70);
42  \endcode
43 
44  \b Note: this functionality is experimental and the API may change
45  in future releases.
46 
47  <b>Note on thread safety</b>
48  Operations that involve reading from FPB files are not thread safe.
49  This means that the \c init() method is not thread safe and none of the
50  search operations are thread safe when an \c FPBReader is initialized in
51  \c lazyRead mode.
52 
53 */
55  public:
56  typedef boost::tuple<double, unsigned int, unsigned int> ResultTuple;
58  : df_init(false), df_initOnSearch(false), df_takeOwnership(false){};
59 
60  /*!
61  \param initOnSearch: if this is true, the \c init() method on child readers
62  will not be called until the first search is done. This is useful with large
63  FPB readers.
64  */
65  MultiFPBReader(bool initOnSearch)
66  : df_init(false),
67  df_initOnSearch(initOnSearch),
68  df_takeOwnership(false){};
69  /*!
70  \param readers: the set of FPBReader objects to use.
71  \param takeOwnership: if true, we own the memory for the FPBReaders
72  \param initOnSearch: if this is true, the \c init() method on child readers
73  will not be called until the first search is done. This is useful with large
74  FPB readers.
75  */
76  MultiFPBReader(std::vector<FPBReader *> &readers, bool takeOwnership = false,
77  bool initOnSearch = false);
78 
80  df_init = false;
81  if (df_takeOwnership) {
82  BOOST_FOREACH (FPBReader *rdr, d_readers) { delete rdr; };
83  d_readers.clear();
84  }
85  };
86 
87  //! Read the data from the file and initialize internal data structures
88  /*!
89  This must be called before most of the other methods of this clases.
90  It calls the \c init() method on each of the child FPBReaders
91 
92  */
93  void init();
94 
95  //! returns the number of readers
96  unsigned int length() const { return d_readers.size(); };
97  //! returns the number of bits in our fingerprints (all readers are expected
98  //! to have the same length)
99  unsigned int nBits() const;
100 
101  //! returns a particular reader
102  /*!
103 
104  \param which: the reader to return
105 
106  */
107  FPBReader *getReader(unsigned int which);
108 
109  //! adds a new FPBReader to our list
110  /*!
111 
112  This does no error checking on the reader, so be careful.
113 
114  If \c takeOwnership is \c true then we will take ownership of the memory.
115 
116  \param rdr: the reader to add. If we have already been initialized, the
117  reader's \c init() method will be called
118 
119  \returns a count of the current number of readers
120  */
121  unsigned int addReader(FPBReader *rdr) {
122  PRECONDITION(rdr, "no reader provided");
123  d_readers.push_back(rdr);
124  if (df_init) rdr->init();
125  return d_readers.size();
126  };
127 
128  //! returns tanimoto neighbors that are within a similarity threshold
129  /*!
130  The result vector of (similarity,index,reader) tuples is sorted in order
131  of decreasing similarity
132 
133  \param bv the query fingerprint
134  \param threshold the minimum similarity to return
135  \param numThreads Sets the number of threads to use (more than one thread
136  will only be used if the RDKit was build with multithread support) If set to
137  zero, the max supported by the system will be used.
138 
139  */
140  std::vector<ResultTuple> getTanimotoNeighbors(const std::uint8_t *bv,
141  double threshold = 0.7,
142  int numThreads = 1) const;
143  //! \overload
144  std::vector<ResultTuple> getTanimotoNeighbors(
145  boost::shared_array<std::uint8_t> bv, double threshold = 0.7,
146  int numThreads = 1) const {
147  return getTanimotoNeighbors(bv.get(), threshold, numThreads);
148  };
149  //! \overload
150  std::vector<ResultTuple> getTanimotoNeighbors(const ExplicitBitVect &ebv,
151  double threshold = 0.7,
152  int numThreads = 1) const;
153 
154  //! returns Tversky neighbors that are within a similarity threshold
155  /*!
156  The result vector of (similarity,index) pairs is sorted in order
157  of decreasing similarity
158 
159  \param bv the query fingerprint
160  \param ca the Tversky a coefficient
161  \param cb the Tversky a coefficient
162  \param threshold the minimum similarity to return
163  \param numThreads Sets the number of threads to use (more than one thread
164  will only be used if the RDKit was build with multithread support) If set to
165  zero, the max supported by the system will be used.
166 
167  */
168  std::vector<ResultTuple> getTverskyNeighbors(const std::uint8_t *bv,
169  double ca, double cb,
170  double threshold = 0.7,
171  int numThreads = 1) const;
172  //! \overload
173  std::vector<ResultTuple> getTverskyNeighbors(
174  boost::shared_array<std::uint8_t> bv, double ca, double cb,
175  double threshold = 0.7, int numThreads = 1) const {
176  return getTverskyNeighbors(bv.get(), ca, cb, threshold, numThreads);
177  };
178  //! \overload
179  std::vector<ResultTuple> getTverskyNeighbors(const ExplicitBitVect &ebv,
180  double ca, double cb,
181  double threshold = 0.7,
182  int numThreads = 1) const;
183 
184  //! returns indices of all fingerprints that completely contain this one
185  /*! (i.e. where all the bits set in the query are also set in the db
186  molecule)
187  */
188  std::vector<std::pair<unsigned int, unsigned int>> getContainingNeighbors(
189  const std::uint8_t *bv, int numThreads = 1) const;
190  //! \overload
191  std::vector<std::pair<unsigned int, unsigned int>> getContainingNeighbors(
192  boost::shared_array<std::uint8_t> bv, int numThreads = 1) const {
193  return getContainingNeighbors(bv.get(), numThreads);
194  };
195  //! \overload
196  std::vector<std::pair<unsigned int, unsigned int>> getContainingNeighbors(
197  const ExplicitBitVect &ebv, int numThreads = 1) const;
198 
199  private:
200  std::vector<FPBReader *> d_readers;
201  bool df_init, df_initOnSearch, df_takeOwnership;
202 
203  // disable automatic copy constructors and assignment operators
204  // for this class and its subclasses. They will likely be
205  // carrying around stream pointers and copying those is a recipe
206  // for disaster.
208  MultiFPBReader &operator=(const MultiFPBReader &);
209 };
210 } // namespace RDKit
211 #endif
RDKit::MultiFPBReader::addReader
unsigned int addReader(FPBReader *rdr)
adds a new FPBReader to our list
Definition: MultiFPBReader.h:121
RDKit::MultiFPBReader::length
unsigned int length() const
returns the number of readers
Definition: MultiFPBReader.h:96
RDKit::MultiFPBReader::getTanimotoNeighbors
std::vector< ResultTuple > getTanimotoNeighbors(boost::shared_array< std::uint8_t > bv, double threshold=0.7, int numThreads=1) const
This is an overloaded member function, provided for convenience. It differs from the above function o...
Definition: MultiFPBReader.h:144
RDKit::MultiFPBReader
class for reading and searching multiple FPB files
Definition: MultiFPBReader.h:54
RDKit::MultiFPBReader::ResultTuple
boost::tuple< double, unsigned int, unsigned int > ResultTuple
Definition: MultiFPBReader.h:56
RDKit::MultiFPBReader::~MultiFPBReader
~MultiFPBReader()
Definition: MultiFPBReader.h:79
RDKit::FPBReader::init
void init()
Read the data from the file and initialize internal data structures.
ExplicitBitVect.h
RDKIT_DATASTRUCTS_EXPORT
#define RDKIT_DATASTRUCTS_EXPORT
Definition: export.h:112
RDKit::MultiFPBReader::getTverskyNeighbors
std::vector< ResultTuple > getTverskyNeighbors(boost::shared_array< std::uint8_t > bv, double ca, double cb, double threshold=0.7, int numThreads=1) const
This is an overloaded member function, provided for convenience. It differs from the above function o...
Definition: MultiFPBReader.h:173
RDKit::MultiFPBReader::MultiFPBReader
MultiFPBReader()
Definition: MultiFPBReader.h:57
FPBReader.h
contains a simple class for reading and searching FPB files
RDKit
Std stuff.
Definition: Atom.h:30
RDKit::MultiFPBReader::getContainingNeighbors
std::vector< std::pair< unsigned int, unsigned int > > getContainingNeighbors(boost::shared_array< std::uint8_t > bv, int numThreads=1) const
This is an overloaded member function, provided for convenience. It differs from the above function o...
Definition: MultiFPBReader.h:191
RDKit::MultiFPBReader::MultiFPBReader
MultiFPBReader(bool initOnSearch)
Definition: MultiFPBReader.h:65
PRECONDITION
#define PRECONDITION(expr, mess)
Definition: Invariant.h:109
RDKit::FPBReader
class for reading and searching FPB files
Definition: FPBReader.h:58
Exceptions.h
ExplicitBitVect
a class for bit vectors that are densely occupied
Definition: ExplicitBitVect.h:29
export.h