RDKit
Open-source cheminformatics and machine learning.
SubstructLibrarySerialization.h
Go to the documentation of this file.
1 // Copyright (c) 2019, Novartis Institutes for BioMedical Research Inc.
2 // All rights reserved.
3 //
4 // Redistribution and use in source and binary forms, with or without
5 // modification, are permitted provided that the following conditions are
6 // met:
7 //
8 // * Redistributions of source code must retain the above copyright
9 // notice, this list of conditions and the following disclaimer.
10 // * Redistributions in binary form must reproduce the above
11 // copyright notice, this list of conditions and the following
12 // disclaimer in the documentation and/or other materials provided
13 // with the distribution.
14 // * Neither the name of Novartis Institutes for BioMedical Research Inc.
15 // nor the names of its contributors may be used to endorse or promote
16 // products derived from this software without specific prior written
17 // permission.
18 //
19 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 //
31 // n.b. must be included at the END of SubstructLibrary.h
32 #ifndef RDK_SUBSTRUCT_LIBRARY_SERIALIZATION
33 #define RDK_SUBSTRUCT_LIBRARY_SERIALIZATION
34 
35 #ifdef RDK_USE_BOOST_SERIALIZATION
37 #include <boost/archive/text_oarchive.hpp>
38 #include <boost/archive/text_iarchive.hpp>
39 #include <boost/serialization/vector.hpp>
40 #include <boost/serialization/shared_ptr.hpp>
42 
43 BOOST_SERIALIZATION_ASSUME_ABSTRACT(RDKit::MolHolderBase)
44 BOOST_SERIALIZATION_ASSUME_ABSTRACT(RDKit::FPHolderBase)
45 
46 namespace boost {
47 namespace serialization {
48 
49 template <class Archive>
50 void serialize(Archive &ar, RDKit::MolHolderBase &,
51  const unsigned int version) {
52  RDUNUSED_PARAM(version);
53  RDUNUSED_PARAM(ar);
54 }
55 
56 template <class Archive>
57 void save(Archive &ar, const RDKit::MolHolder &molholder,
58  const unsigned int version) {
59  RDUNUSED_PARAM(version);
60  ar &boost::serialization::base_object<RDKit::MolHolderBase>(molholder);
61 
62  std::int64_t pkl_count = molholder.getMols().size();
63  ar &pkl_count;
64 
65  for (auto &mol : molholder.getMols()) {
66  std::string pkl;
67  RDKit::MolPickler::pickleMol(*mol.get(), pkl);
68  ar << pkl;
69  }
70 }
71 
72 template <class Archive>
73 void load(Archive &ar, RDKit::MolHolder &molholder,
74  const unsigned int version) {
75  RDUNUSED_PARAM(version);
76  ar &boost::serialization::base_object<RDKit::MolHolderBase>(molholder);
77 
78  std::vector<boost::shared_ptr<RDKit::ROMol>> &mols = molholder.getMols();
79  mols.clear();
80 
81  std::int64_t pkl_count = -1;
82  ar &pkl_count;
83 
84  for (std::int64_t i = 0; i < pkl_count; ++i) {
85  std::string pkl;
86  ar >> pkl;
87  mols.push_back(boost::make_shared<RDKit::ROMol>(pkl));
88  }
89 }
90 
91 template <class Archive, class MolHolder>
92 void serialize_strings(Archive &ar, MolHolder &molholder,
93  const unsigned int version) {
94  RDUNUSED_PARAM(version);
95  ar &boost::serialization::base_object<RDKit::MolHolderBase>(molholder);
96  ar &molholder.getMols();
97 }
98 
99 template <class Archive>
100 void serialize(Archive &ar, RDKit::CachedMolHolder &molholder,
101  const unsigned int version) {
102  serialize_strings(ar, molholder, version);
103 }
104 
105 template <class Archive>
106 void serialize(Archive &ar, RDKit::CachedSmilesMolHolder &molholder,
107  const unsigned int version) {
108  serialize_strings(ar, molholder, version);
109 }
110 
111 template <class Archive>
112 void serialize(Archive &ar, RDKit::CachedTrustedSmilesMolHolder &molholder,
113  const unsigned int version) {
114  serialize_strings(ar, molholder, version);
115 }
116 
117 template <class Archive>
118 void save(Archive &ar, const RDKit::FPHolderBase &fpholder,
119  const unsigned int version) {
120  RDUNUSED_PARAM(version);
121  std::vector<std::string> pickles;
122  for (auto &fp : fpholder.getFingerprints()) {
123  pickles.push_back(fp->toString());
124  }
125  ar &pickles;
126 }
127 
128 template <class Archive>
129 void load(Archive &ar, RDKit::FPHolderBase &fpholder,
130  const unsigned int version) {
131  RDUNUSED_PARAM(version);
132  std::vector<std::string> pickles;
133  std::vector<ExplicitBitVect *> &fps = fpholder.getFingerprints();
134 
135  ar &pickles;
136  for (size_t i = 0; i < fps.size(); ++i) delete fps[i];
137  fps.clear();
138 
139  for (auto &pkl : pickles) {
140  fps.push_back(new ExplicitBitVect(pkl));
141  }
142 }
143 
144 template <class Archive>
145 void serialize(Archive &ar, RDKit::PatternHolder &pattern_holder,
146  const unsigned int version) {
147  RDUNUSED_PARAM(version);
148  ar &boost::serialization::base_object<RDKit::FPHolderBase>(pattern_holder);
149 }
150 
151 template <class Archive>
152 void registerSubstructLibraryTypes(Archive &ar) {
153  ar.register_type(static_cast<RDKit::MolHolder *>(NULL));
154  ar.register_type(static_cast<RDKit::CachedMolHolder *>(NULL));
155  ar.register_type(static_cast<RDKit::CachedSmilesMolHolder *>(NULL));
156  ar.register_type(static_cast<RDKit::CachedTrustedSmilesMolHolder *>(NULL));
157  ar.register_type(static_cast<RDKit::PatternHolder *>(NULL));
158 }
159 
160 template <class Archive>
161 void save(Archive &ar, const RDKit::SubstructLibrary &slib,
162  const unsigned int version) {
163  RDUNUSED_PARAM(version);
164  registerSubstructLibraryTypes(ar);
165  ar &slib.getMolHolder();
166  ar &slib.getFpHolder();
167 }
168 
169 template <class Archive>
170 void load(Archive &ar, RDKit::SubstructLibrary &slib,
171  const unsigned int version) {
172  RDUNUSED_PARAM(version);
173  registerSubstructLibraryTypes(ar);
174  ar &slib.getMolHolder();
175  ar &slib.getFpHolder();
176  slib.resetHolders();
177 }
178 
179 } // end namespace serialization
180 } // end namespace boost
181 
182 BOOST_CLASS_VERSION(RDKit::MolHolder, 1);
183 BOOST_CLASS_VERSION(RDKit::CachedMolHolder, 1);
184 BOOST_CLASS_VERSION(RDKit::CachedSmilesMolHolder, 1);
185 BOOST_CLASS_VERSION(RDKit::CachedTrustedSmilesMolHolder, 1);
186 BOOST_CLASS_VERSION(RDKit::PatternHolder, 1);
187 BOOST_CLASS_VERSION(RDKit::SubstructLibrary, 1);
188 
189 BOOST_SERIALIZATION_SPLIT_FREE(RDKit::MolHolder);
190 BOOST_SERIALIZATION_SPLIT_FREE(RDKit::FPHolderBase);
191 BOOST_SERIALIZATION_SPLIT_FREE(RDKit::SubstructLibrary);
192 
193 #endif
194 #endif
RDKit::CachedTrustedSmilesMolHolder
Concrete class that holds trusted smiles strings in memory.
Definition: SubstructLibrary.h:196
BoostStartInclude.h
RDKit::SubstructLibrary::getFpHolder
boost::shared_ptr< FPHolderBase > & getFpHolder()
Get the underlying molecule holder implementation.
Definition: SubstructLibrary.h:399
RDKit::SubstructLibrary
Substructure Search a library of molecules.
Definition: SubstructLibrary.h:360
boost
Definition: RDLog.h:21
RDKit::FPHolderBase
Base FPI for the fingerprinter used to rule out impossible matches.
Definition: SubstructLibrary.h:233
RDUNUSED_PARAM
#define RDUNUSED_PARAM(x)
Definition: Invariant.h:196
BoostEndInclude.h
RDKit::SubstructLibrary::getMolHolder
boost::shared_ptr< MolHolderBase > & getMolHolder()
Get the underlying molecule holder implementation.
Definition: SubstructLibrary.h:392
RDKit::FPHolderBase::getFingerprints
std::vector< ExplicitBitVect * > & getFingerprints()
Definition: SubstructLibrary.h:271
RDKit::MolHolderBase
Base class API for holding molecules to substructure search.
Definition: SubstructLibrary.h:52
RDKit::CachedSmilesMolHolder
Concrete class that holds smiles strings in memory.
Definition: SubstructLibrary.h:147
RDKit::MolHolder
Concrete class that holds molecules in memory.
Definition: SubstructLibrary.h:72
RDKit::SubstructLibrary::resetHolders
void resetHolders()
access required for serialization
Definition: SubstructLibrary.h:572
RDKit::MolPickler::pickleMol
static void pickleMol(const ROMol *mol, std::ostream &ss)
pickles a molecule and sends the results to stream ss
RDKit::PatternHolder
Uses the pattern fingerprinter to rule out matches.
Definition: SubstructLibrary.h:276
RDKit::MolHolder::getMols
std::vector< boost::shared_ptr< ROMol > > & getMols()
Definition: SubstructLibrary.h:92
RDKit::CachedMolHolder
Concrete class that holds binary cached molecules in memory.
Definition: SubstructLibrary.h:104
ExplicitBitVect
a class for bit vectors that are densely occupied
Definition: ExplicitBitVect.h:29