RDKit
Open-source cheminformatics and machine learning.
FileParsers.h
Go to the documentation of this file.
1 //
2 // Copyright (C) 2002-2013 Greg Landrum, Rational Discovery LLC
3 //
4 // @@ All Rights Reserved @@
5 // This file is part of the RDKit.
6 // The contents are covered by the terms of the BSD license
7 // which is included in the file license.txt, found at the root
8 // of the RDKit source tree.
9 //
10 #include <RDGeneral/export.h>
11 #ifndef _RD_FILEPARSERS_H
12 #define _RD_FILEPARSERS_H
13 
14 #include <RDGeneral/types.h>
15 #include <GraphMol/RDKitBase.h>
16 
17 #include <string>
18 #include <iostream>
19 #include <vector>
20 #include <exception>
21 
22 #include <boost/shared_ptr.hpp>
23 
24 namespace RDKit {
25 const int MOLFILE_MAXLINE = 256;
26 RDKIT_FILEPARSERS_EXPORT std::string strip(const std::string &orig);
27 
29  : public std::exception {
30  public:
31  //! construct with an error message
32  explicit MolFileUnhandledFeatureException(const char *msg) : _msg(msg) {}
33  //! construct with an error message
34  explicit MolFileUnhandledFeatureException(const std::string msg)
35  : _msg(msg) {}
36  //! get the error message
37  const char *what() const noexcept override { return _msg.c_str(); }
38  ~MolFileUnhandledFeatureException() noexcept override = default;
39 
40  private:
41  std::string _msg;
42 };
43 
44 //-----
45 // mol files
46 //-----
47 typedef std::vector<RWMOL_SPTR> RWMOL_SPTR_VECT;
48 // \brief construct a molecule from MDL mol data in a stream
49 /*!
50  * \param inStream - stream containing the data
51  * \param line - current line number (used for error reporting)
52  * \param sanitize - toggles sanitization and stereochemistry
53  * perception of the molecule
54  * \param removeHs - toggles removal of Hs from the molecule. H removal
55  * is only done if the molecule is sanitized
56  * \param line - current line number (used for error reporting)
57  * \param strictParsing - if set to false, the parser is more lax about
58  * correctness of the contents.
59  *
60  */
62  unsigned int &line,
63  bool sanitize = true,
64  bool removeHs = true,
65  bool strictParsing = true);
66 // \overload
68  unsigned int &line,
69  bool sanitize = true,
70  bool removeHs = true,
71  bool strictParsing = true);
72 // \brief construct a molecule from an MDL mol block
73 /*!
74  * \param molBlock - string containing the mol block
75  * \param sanitize - toggles sanitization and stereochemistry
76  * perception of the molecule
77  * \param removeHs - toggles removal of Hs from the molecule. H removal
78  * is only done if the molecule is sanitized
79  * \param strictParsing - if set to false, the parser is more lax about
80  * correctness of the contents.
81  */
82 RDKIT_FILEPARSERS_EXPORT RWMol *MolBlockToMol(const std::string &molBlock,
83  bool sanitize = true,
84  bool removeHs = true,
85  bool strictParsing = true);
86 
87 // \brief construct a molecule from an MDL mol file
88 /*!
89  * \param fName - string containing the file name
90  * \param sanitize - toggles sanitization and stereochemistry
91  * perception of the molecule
92  * \param removeHs - toggles removal of Hs from the molecule. H removal
93  * is only done if the molecule is sanitized
94  * \param strictParsing - if set to false, the parser is more lax about
95  * correctness of the contents.
96  */
97 RDKIT_FILEPARSERS_EXPORT RWMol *MolFileToMol(const std::string &fName,
98  bool sanitize = true,
99  bool removeHs = true,
100  bool strictParsing = true);
101 
102 // \brief generates an MDL mol block for a molecule
103 /*!
104  * \param mol - the molecule in question
105  * \param includeStereo - toggles inclusion of stereochemistry information
106  * \param confId - selects the conformer to be used
107  * \param kekulize - triggers kekulization of the molecule before it is
108  * written
109  * \param forceV3000 - force generation a V3000 mol block (happens
110  * automatically with
111  * more than 999 atoms or bonds)
112  */
114  bool includeStereo = true,
115  int confId = -1,
116  bool kekulize = true,
117  bool forceV3000 = false);
118 
119 // \brief generates an MDL v3000 mol block for a molecule
120 /*!
121  * \param mol - the molecule in question
122  * \param includeStereo - toggles inclusion of stereochemistry information
123  * \param confId - selects the conformer to be used
124  * \param kekulize - triggers kekulization of the molecule before it is
125  * written
126  */
127 inline std::string MolToV3KMolBlock(const ROMol &mol, bool includeStereo = true,
128  int confId = -1, bool kekulize = true) {
129  return MolToMolBlock(mol, includeStereo, confId, kekulize, true);
130 }
131 
132 // \brief Writes a molecule to an MDL mol file
133 /*!
134  * \param mol - the molecule in question
135  * \param fName - the name of the file to use
136  * \param includeStereo - toggles inclusion of stereochemistry information
137  * \param confId - selects the conformer to be used
138  * \param kekulize - triggers kekulization of the molecule before it is
139  * written
140  * \param forceV3000 - force generation a V3000 mol block (happens
141  * automatically with
142  * more than 999 atoms or bonds)
143  */
145  const ROMol &mol, const std::string &fName, bool includeStereo = true,
146  int confId = -1, bool kekulize = true, bool forceV3000 = false);
147 
148 // \brief Writes a molecule to an MDL V3000 mol file
149 /*!
150  * \param mol - the molecule in question
151  * \param fName - the name of the file to use
152  * \param includeStereo - toggles inclusion of stereochemistry information
153  * \param confId - selects the conformer to be used
154  * \param kekulize - triggers kekulization of the molecule before it is
155  * written
156  */
157 inline void MolToV3KMolFile(const ROMol &mol, const std::string &fName,
158  bool includeStereo = true, int confId = -1,
159  bool kekulize = true) {
160  MolToMolFile(mol, fName, includeStereo, confId, kekulize, true);
161 }
162 
164  int confId = -1,
165  bool kekulize = true);
166 
168  const std::string &fName,
169  int confId = -1,
170  bool kekulize = true);
171 
173  int confId = -1);
174 
176  const std::string &fName,
177  int confId = -1);
178 
179 //-----
180 // TPL handling:
181 //-----
182 
183 //! \brief translate TPL data (BioCad format) into a multi-conf molecule
184 /*!
185  \param inStream: the stream from which to read
186  \param line: used to track the line number of errors
187  \param sanitize: toggles sanitization and stereochemistry
188  perception of the molecule
189  \param skipFirstConf: according to the TPL format description, the atomic
190  coords in the atom-information block describe the first
191  conformation and the first conf block describes second
192  conformation. The CombiCode, on the other hand, writes
193  the first conformation data both to the atom-information
194  block and to the first conf block. We want to be able to
195  read CombiCode-style tpls, so we'll allow this
196  mis-feature
197  to be parsed when this flag is set.
198 */
200  unsigned int &line,
201  bool sanitize = true,
202  bool skipFirstConf = false);
203 
204 //! \brief construct a multi-conf molecule from a TPL (BioCad format) file
205 /*!
206  \param fName: the name of the file from which to read
207  \param sanitize: toggles sanitization and stereochemistry
208  perception of the molecule
209  \param skipFirstConf: according to the TPL format description, the atomic
210  coords in the atom-information block describe the first
211  conformation and the first conf block describes second
212  conformation. The CombiCode, on the other hand, writes
213  the first conformation data both to the atom-information
214  block and to the first conf block. We want to be able to
215  read CombiCode-style tpls, so we'll allow this
216  mis-feature
217  to be parsed when this flag is set.
218 */
219 RDKIT_FILEPARSERS_EXPORT RWMol *TPLFileToMol(const std::string &fName,
220  bool sanitize = true,
221  bool skipFirstConf = false);
222 
224  const ROMol &mol, const std::string &partialChargeProp = "_GasteigerCharge",
225  bool writeFirstConfTwice = false);
227  const ROMol &mol, const std::string &fName,
228  const std::string &partialChargeProp = "_GasteigerCharge",
229  bool writeFirstConfTwice = false);
230 
231 //-----
232 // MOL2 handling
233 //-----
234 
235 typedef enum {
236  CORINA = 0 //! supports output from Corina and some dbtranslate output
238 
239 // \brief construct a molecule from a Tripos mol2 file
240 /*!
241  *
242  * \param fName - string containing the file name
243  * \param sanitize - toggles sanitization of the molecule
244  * \param removeHs - toggles removal of Hs from the molecule. H removal
245  * is only done if the molecule is sanitized
246  * \param variant - the atom type definitions to use
247  * \param cleanupSubstructures - toggles recognition and cleanup of common
248  * substructures
249  */
250 RDKIT_FILEPARSERS_EXPORT RWMol *Mol2FileToMol(const std::string &fName,
251  bool sanitize = true,
252  bool removeHs = true,
253  Mol2Type variant = CORINA,
254  bool cleanupSubstructures = true);
255 
256 // \brief construct a molecule from Tripos mol2 data in a stream
257 /*!
258  * \param inStream - stream containing the data
259  * \param sanitize - toggles sanitization of the molecule
260  * \param removeHs - toggles removal of Hs from the molecule. H removal
261  * is only done if the molecule is sanitized
262  * \param variant - the atom type definitions to use
263  * \param cleanupSubstructures - toggles recognition and cleanup of common
264  * substructures
265  */
267  std::istream *inStream, bool sanitize = true, bool removeHs = true,
268  Mol2Type variant = CORINA, bool cleanupSubstructures = true);
269 // \overload
271  std::istream &inStream, bool sanitize = true, bool removeHs = true,
272  Mol2Type variant = CORINA, bool cleanupSubstructures = true);
273 
274 // \brief construct a molecule from a Tripos mol2 block
275 /*!
276  * \param molBlock - string containing the mol block
277  * \param sanitize - toggles sanitization of the molecule
278  * \param removeHs - toggles removal of Hs from the molecule. H removal
279  * is only done if the molecule is sanitized
280  * \param variant - the atom type definitions to use
281  * \param cleanupSubstructures - toggles recognition and cleanup of common
282  * substructures
283  */
285  const std::string &molBlock, bool sanitize = true, bool removeHs = true,
286  Mol2Type variant = CORINA, bool cleanupSubstructures = true);
287 
289  bool sanitize = true,
290  bool removeHs = true,
291  unsigned int flavor = 0,
292  bool proximityBonding = true);
293 
295  bool sanitize = true,
296  bool removeHs = true,
297  unsigned int flavor = 0,
298  bool proximityBonding = true);
300  std::istream *inStream, bool sanitize = true, bool removeHs = true,
301  unsigned int flavor = 0, bool proximityBonding = true);
303  std::istream &inStream, bool sanitize = true, bool removeHs = true,
304  unsigned int flavor = 0, bool proximityBonding = true);
305 RDKIT_FILEPARSERS_EXPORT RWMol *PDBFileToMol(const std::string &fname,
306  bool sanitize = true,
307  bool removeHs = true,
308  unsigned int flavor = 0,
309  bool proximityBonding = true);
310 
311 // \brief generates an PDB block for a molecule
312 /*!
313  * \param mol - the molecule in question
314  * \param confId - selects the conformer to be used
315  * \param flavor - controls what gets written:
316  * flavor & 1 : Write MODEL/ENDMDL lines around each record
317  * flavor & 2 : Don't write single CONECT records
318  * flavor & 4 : Write CONECT records in both directions
319  * flavor & 8 : Don't use multiple CONECTs to encode bond order
320  * flavor & 16 : Write MASTER record
321  * flavor & 32 : Write TER record
322  */
324  int confId = -1,
325  unsigned int flavor = 0);
326 // \brief Writes a molecule to an MDL mol file
327 /*!
328  * \param mol - the molecule in question
329  * \param fName - the name of the file to use
330  * \param confId - selects the conformer to be used
331  * \param flavor - controls what gets written:
332  * flavor & 1 : Write MODEL/ENDMDL lines around each record
333  * flavor & 2 : Don't write single CONECT records
334  * flavor & 4 : Write CONECT records in both directions
335  * flavor & 8 : Don't use multiple CONECTs to encode bond order
336  * flavor & 16 : Write MASTER record
337  * flavor & 32 : Write TER record
338  */
340  const std::string &fname,
341  int confId = -1,
342  unsigned int flavor = 0);
343 
344 // \brief reads a molecule from the metadata in an RDKit-generated SVG file
345 /*!
346  * \param svg - string containing the SVG
347  * \param sanitize - toggles sanitization of the molecule
348  * \param removeHs - toggles removal of Hs from the molecule. H removal
349  * is only done if the molecule is sanitized
350  *
351  * **NOTE** This functionality should be considered beta.
352  */
354  bool sanitize = true,
355  bool removeHs = true);
356 /*! \overload
357  */
359  bool sanitize = true,
360  bool removeHs = true);
361 
362 inline std::unique_ptr<RDKit::RWMol> operator"" _ctab(const char *text,
363  size_t len) {
364  std::string data(text, len);
365  RWMol *ptr = nullptr;
366  try {
367  ptr = MolBlockToMol(data);
368  } catch (const RDKit::MolSanitizeException &) {
369  ptr = nullptr;
370  }
371  return std::unique_ptr<RWMol>(ptr);
372 }
373 inline std::unique_ptr<RDKit::RWMol> operator"" _mol2(const char *text,
374  size_t len) {
375  std::string data(text, len);
376  RWMol *ptr = nullptr;
377  try {
378  ptr = Mol2BlockToMol(data);
379  } catch (const RDKit::MolSanitizeException &) {
380  ptr = nullptr;
381  }
382  return std::unique_ptr<RWMol>(ptr);
383 }
384 
385 inline std::unique_ptr<RDKit::RWMol> operator"" _pdb(const char *text,
386  size_t len) {
387  std::string data(text, len);
388  RWMol *ptr = nullptr;
389  try {
390  ptr = PDBBlockToMol(data);
391  } catch (const RDKit::MolSanitizeException &) {
392  ptr = nullptr;
393  }
394  return std::unique_ptr<RWMol>(ptr);
395 }
396 
397 } // namespace RDKit
398 
399 #endif
pulls in the core RDKit functionality
MolFileUnhandledFeatureException(const char *msg)
construct with an error message
Definition: FileParsers.h:32
MolFileUnhandledFeatureException(const std::string msg)
construct with an error message
Definition: FileParsers.h:34
~MolFileUnhandledFeatureException() noexcept override=default
const char * what() const noexcept override
get the error message
Definition: FileParsers.h:37
class for flagging sanitization errors
RWMol is a molecule class that is intended to be edited.
Definition: RWMol.h:32
#define RDKIT_FILEPARSERS_EXPORT
Definition: export.h:153
RDKIT_GRAPHMOL_EXPORT ROMol * removeHs(const ROMol &mol, bool implicitOnly=false, bool updateExplicitCount=false, bool sanitize=true)
returns a copy of a molecule with hydrogens removed
Std stuff.
Definition: Abbreviations.h:18
std::string MolToV3KMolBlock(const ROMol &mol, bool includeStereo=true, int confId=-1, bool kekulize=true)
Definition: FileParsers.h:127
RDKIT_FILEPARSERS_EXPORT std::string strip(const std::string &orig)
RDKIT_FILEPARSERS_EXPORT void MolToMolFile(const ROMol &mol, const std::string &fName, bool includeStereo=true, int confId=-1, bool kekulize=true, bool forceV3000=false)
RDKIT_FILEPARSERS_EXPORT std::string MolToPDBBlock(const ROMol &mol, int confId=-1, unsigned int flavor=0)
RDKIT_FILEPARSERS_EXPORT RWMol * MolBlockToMol(const std::string &molBlock, bool sanitize=true, bool removeHs=true, bool strictParsing=true)
RDKIT_FILEPARSERS_EXPORT std::string MolToXYZBlock(const ROMol &mol, int confId=-1)
RDKIT_FILEPARSERS_EXPORT void MolToXYZFile(const ROMol &mol, const std::string &fName, int confId=-1)
RDKIT_FILEPARSERS_EXPORT std::string MolToTPLText(const ROMol &mol, const std::string &partialChargeProp="_GasteigerCharge", bool writeFirstConfTwice=false)
RDKIT_FILEPARSERS_EXPORT void MolToPDBFile(const ROMol &mol, const std::string &fname, int confId=-1, unsigned int flavor=0)
RDKIT_FILEPARSERS_EXPORT void MolToCMLFile(const ROMol &mol, const std::string &fName, int confId=-1, bool kekulize=true)
RDKIT_FILEPARSERS_EXPORT RWMol * PDBFileToMol(const std::string &fname, bool sanitize=true, bool removeHs=true, unsigned int flavor=0, bool proximityBonding=true)
void MolToV3KMolFile(const ROMol &mol, const std::string &fName, bool includeStereo=true, int confId=-1, bool kekulize=true)
Definition: FileParsers.h:157
RDKIT_FILEPARSERS_EXPORT RWMol * TPLDataStreamToMol(std::istream *inStream, unsigned int &line, bool sanitize=true, bool skipFirstConf=false)
translate TPL data (BioCad format) into a multi-conf molecule
RDKIT_FILEPARSERS_EXPORT std::string MolToMolBlock(const ROMol &mol, bool includeStereo=true, int confId=-1, bool kekulize=true, bool forceV3000=false)
RDKIT_FILEPARSERS_EXPORT std::string MolToCMLBlock(const ROMol &mol, int confId=-1, bool kekulize=true)
RDKIT_FILEPARSERS_EXPORT RWMol * PDBDataStreamToMol(std::istream *inStream, bool sanitize=true, bool removeHs=true, unsigned int flavor=0, bool proximityBonding=true)
@ CORINA
Definition: FileParsers.h:236
RDKIT_FILEPARSERS_EXPORT RWMol * Mol2FileToMol(const std::string &fName, bool sanitize=true, bool removeHs=true, Mol2Type variant=CORINA, bool cleanupSubstructures=true)
RDKIT_FILEPARSERS_EXPORT RWMol * RDKitSVGToMol(const std::string &svg, bool sanitize=true, bool removeHs=true)
RDKIT_FILEPARSERS_EXPORT void MolToTPLFile(const ROMol &mol, const std::string &fName, const std::string &partialChargeProp="_GasteigerCharge", bool writeFirstConfTwice=false)
RDKIT_FILEPARSERS_EXPORT RWMol * Mol2DataStreamToMol(std::istream *inStream, bool sanitize=true, bool removeHs=true, Mol2Type variant=CORINA, bool cleanupSubstructures=true)
RDKIT_FILEPARSERS_EXPORT RWMol * PDBBlockToMol(const char *str, bool sanitize=true, bool removeHs=true, unsigned int flavor=0, bool proximityBonding=true)
RDKIT_FILEPARSERS_EXPORT RWMol * Mol2BlockToMol(const std::string &molBlock, bool sanitize=true, bool removeHs=true, Mol2Type variant=CORINA, bool cleanupSubstructures=true)
RDKIT_FILEPARSERS_EXPORT RWMol * TPLFileToMol(const std::string &fName, bool sanitize=true, bool skipFirstConf=false)
construct a multi-conf molecule from a TPL (BioCad format) file
RDKIT_FILEPARSERS_EXPORT RWMol * MolDataStreamToMol(std::istream *inStream, unsigned int &line, bool sanitize=true, bool removeHs=true, bool strictParsing=true)
RDKIT_FILEPARSERS_EXPORT RWMol * MolFileToMol(const std::string &fName, bool sanitize=true, bool removeHs=true, bool strictParsing=true)
const int MOLFILE_MAXLINE
Definition: FileParsers.h:25
std::vector< RWMOL_SPTR > RWMOL_SPTR_VECT
Definition: FileParsers.h:47
boost::shared_ptr< RWMol > RWMOL_SPTR
Definition: RWMol.h:217