RDKit
Open-source cheminformatics and machine learning.
SubstructLibrarySerialization.h
Go to the documentation of this file.
1 // Copyright (c) 2019, Novartis Institutes for BioMedical Research Inc.
2 // All rights reserved.
3 //
4 // Redistribution and use in source and binary forms, with or without
5 // modification, are permitted provided that the following conditions are
6 // met:
7 //
8 // * Redistributions of source code must retain the above copyright
9 // notice, this list of conditions and the following disclaimer.
10 // * Redistributions in binary form must reproduce the above
11 // copyright notice, this list of conditions and the following
12 // disclaimer in the documentation and/or other materials provided
13 // with the distribution.
14 // * Neither the name of Novartis Institutes for BioMedical Research Inc.
15 // nor the names of its contributors may be used to endorse or promote
16 // products derived from this software without specific prior written
17 // permission.
18 //
19 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 //
31 // n.b. must be included at the END of SubstructLibrary.h
32 #ifndef RDK_SUBSTRUCT_LIBRARY_SERIALIZATION
33 #define RDK_SUBSTRUCT_LIBRARY_SERIALIZATION
34 
35 #ifdef RDK_USE_BOOST_SERIALIZATION
37 #include <boost/archive/text_oarchive.hpp>
38 #include <boost/archive/text_iarchive.hpp>
39 #include <boost/serialization/vector.hpp>
40 #include <boost/serialization/shared_ptr.hpp>
41 #include <boost/archive/archive_exception.hpp>
43 
44 BOOST_SERIALIZATION_ASSUME_ABSTRACT(RDKit::MolHolderBase)
45 BOOST_SERIALIZATION_ASSUME_ABSTRACT(RDKit::FPHolderBase)
46 
47 namespace boost {
48 namespace serialization {
49 
50 template <class Archive>
51 void serialize(Archive &, RDKit::MolHolderBase &, const unsigned int) {}
52 
53 template <class Archive>
54 void save(Archive &ar, const RDKit::MolHolder &molholder,
55  const unsigned int version) {
56  RDUNUSED_PARAM(version);
57  ar &boost::serialization::base_object<RDKit::MolHolderBase>(molholder);
58 
59  std::int64_t pkl_count = molholder.getMols().size();
60  ar &pkl_count;
61 
62  for (auto &mol : molholder.getMols()) {
63  std::string pkl;
64  RDKit::MolPickler::pickleMol(*mol.get(), pkl);
65  ar << pkl;
66  }
67 }
68 
69 template <class Archive>
70 void load(Archive &ar, RDKit::MolHolder &molholder,
71  const unsigned int version) {
72  RDUNUSED_PARAM(version);
73  ar &boost::serialization::base_object<RDKit::MolHolderBase>(molholder);
74 
75  std::vector<boost::shared_ptr<RDKit::ROMol>> &mols = molholder.getMols();
76  mols.clear();
77 
78  std::int64_t pkl_count = -1;
79  ar &pkl_count;
80 
81  for (std::int64_t i = 0; i < pkl_count; ++i) {
82  std::string pkl;
83  ar >> pkl;
84  mols.push_back(boost::make_shared<RDKit::ROMol>(pkl));
85  }
86 }
87 
88 template <class Archive, class MolHolder>
89 void serialize_strings(Archive &ar, MolHolder &molholder,
90  const unsigned int version) {
91  RDUNUSED_PARAM(version);
92  ar &boost::serialization::base_object<RDKit::MolHolderBase>(molholder);
93  ar &molholder.getMols();
94 }
95 
96 template <class Archive>
97 void serialize(Archive &ar, RDKit::CachedMolHolder &molholder,
98  const unsigned int version) {
99  serialize_strings(ar, molholder, version);
100 }
101 
102 template <class Archive>
103 void serialize(Archive &ar, RDKit::CachedSmilesMolHolder &molholder,
104  const unsigned int version) {
105  serialize_strings(ar, molholder, version);
106 }
107 
108 template <class Archive>
109 void serialize(Archive &ar, RDKit::CachedTrustedSmilesMolHolder &molholder,
110  const unsigned int version) {
111  serialize_strings(ar, molholder, version);
112 }
113 
114 template <class Archive>
115 void save(Archive &ar, const RDKit::FPHolderBase &fpholder,
116  const unsigned int version) {
117  RDUNUSED_PARAM(version);
118  std::vector<std::string> pickles;
119  for (auto &fp : fpholder.getFingerprints()) {
120  pickles.push_back(fp->toString());
121  }
122  ar &pickles;
123 }
124 
125 template <class Archive>
126 void load(Archive &ar, RDKit::FPHolderBase &fpholder,
127  const unsigned int version) {
128  RDUNUSED_PARAM(version);
129  std::vector<std::string> pickles;
130  std::vector<ExplicitBitVect *> &fps = fpholder.getFingerprints();
131 
132  ar &pickles;
133  for (size_t i = 0; i < fps.size(); ++i) {
134  delete fps[i];
135  }
136  fps.clear();
137 
138  for (auto &pkl : pickles) {
139  fps.push_back(new ExplicitBitVect(pkl));
140  }
141 }
142 
143 template <class Archive>
144 void serialize(Archive &ar, RDKit::PatternHolder &pattern_holder,
145  const unsigned int version) {
146  RDUNUSED_PARAM(version);
147  ar &boost::serialization::base_object<RDKit::FPHolderBase>(pattern_holder);
148  if (Archive::is_saving::value &&
149  pattern_holder.getNumBits() != RDKit::PatternHolder::defaultNumBits()) {
150  ar &pattern_holder.getNumBits();
151  } else if (Archive::is_loading::value) {
152  try {
153  ar &pattern_holder.getNumBits();
154  } catch (boost::archive::archive_exception &) {
156  }
157  }
158 }
159 
160 template <class Archive>
161 void serialize(Archive &ar, RDKit::TautomerPatternHolder &pattern_holder,
162  const unsigned int version) {
163  RDUNUSED_PARAM(version);
164  ar &boost::serialization::base_object<RDKit::FPHolderBase>(pattern_holder);
165  ar &pattern_holder.getNumBits();
166 }
167 
168 template <class Archive>
169 void serialize(Archive &, RDKit::KeyHolderBase &, const unsigned int) {}
170 
171 template <class Archive>
172 void serialize(Archive &ar, RDKit::KeyFromPropHolder &key_holder,
173  const unsigned int) {
174  ar &boost::serialization::base_object<RDKit::KeyHolderBase>(key_holder);
175  ar &key_holder.getPropName();
176  ar &key_holder.getKeys();
177 }
178 
179 template <class Archive>
180 void registerSubstructLibraryTypes(Archive &ar) {
181  ar.register_type(static_cast<RDKit::MolHolder *>(nullptr));
182  ar.register_type(static_cast<RDKit::CachedMolHolder *>(nullptr));
183  ar.register_type(static_cast<RDKit::CachedSmilesMolHolder *>(nullptr));
184  ar.register_type(static_cast<RDKit::CachedTrustedSmilesMolHolder *>(nullptr));
185  ar.register_type(static_cast<RDKit::PatternHolder *>(nullptr));
186  ar.register_type(static_cast<RDKit::TautomerPatternHolder *>(nullptr));
187  ar.register_type(static_cast<RDKit::KeyFromPropHolder *>(nullptr));
188 }
189 
190 template <class Archive>
191 void save(Archive &ar, const RDKit::SubstructLibrary &slib,
192  const unsigned int version) {
193  RDUNUSED_PARAM(version);
194  registerSubstructLibraryTypes(ar);
195  ar &slib.getSearchOrder();
196  ar &slib.getKeyHolder();
197  ar &slib.getMolHolder();
198  ar &slib.getFpHolder();
199 }
200 
201 template <class Archive>
202 void load(Archive &ar, RDKit::SubstructLibrary &slib,
203  const unsigned int version) {
204  RDUNUSED_PARAM(version);
205  registerSubstructLibraryTypes(ar);
206  if (version > 1) {
207  ar &slib.getSearchOrder();
208  ar &slib.getKeyHolder();
209  }
210  ar &slib.getMolHolder();
211  ar &slib.getFpHolder();
212  slib.resetHolders();
213 }
214 
215 } // end namespace serialization
216 } // end namespace boost
217 
218 BOOST_CLASS_VERSION(RDKit::MolHolder, 1);
219 BOOST_CLASS_VERSION(RDKit::CachedMolHolder, 1);
220 BOOST_CLASS_VERSION(RDKit::CachedSmilesMolHolder, 1);
221 BOOST_CLASS_VERSION(RDKit::CachedTrustedSmilesMolHolder, 1);
222 BOOST_CLASS_VERSION(RDKit::PatternHolder, 1);
223 BOOST_CLASS_VERSION(RDKit::TautomerPatternHolder, 1);
224 BOOST_CLASS_VERSION(RDKit::SubstructLibrary, 2);
225 
226 BOOST_SERIALIZATION_SPLIT_FREE(RDKit::MolHolder);
227 BOOST_SERIALIZATION_SPLIT_FREE(RDKit::FPHolderBase);
228 BOOST_SERIALIZATION_SPLIT_FREE(RDKit::SubstructLibrary);
229 
230 #endif
231 #endif
#define RDUNUSED_PARAM(x)
Definition: Invariant.h:196
a class for bit vectors that are densely occupied
Concrete class that holds binary cached molecules in memory.
Concrete class that holds smiles strings in memory.
Concrete class that holds trusted smiles strings in memory.
Base FPI for the fingerprinter used to rule out impossible matches.
std::vector< ExplicitBitVect * > & getFingerprints()
std::vector< std::string > & getKeys()
Base class API for holding molecules to substructure search.
Concrete class that holds molecules in memory.
std::vector< boost::shared_ptr< ROMol > > & getMols()
static void pickleMol(const ROMol *mol, std::ostream &ss)
pickles a molecule and sends the results to stream ss
static unsigned int defaultNumBits()
const unsigned int & getNumBits() const
Substructure Search a library of molecules.
boost::shared_ptr< MolHolderBase > & getMolHolder()
Get the underlying molecule holder implementation.
boost::shared_ptr< KeyHolderBase > & getKeyHolder()
Get the underlying molecule holder implementation.
boost::shared_ptr< FPHolderBase > & getFpHolder()
Get the underlying molecule holder implementation.
const std::vector< unsigned int > & getSearchOrder() const
void resetHolders()
access required for serialization
Definition: RDLog.h:24