RDKit
Open-source cheminformatics and machine learning.
MolStandardize.h
Go to the documentation of this file.
1 //
2 // Copyright (C) 2018-2021 Susan H. Leung and other RDKit contributors
3 //
4 // @@ All Rights Reserved @@
5 // This file is part of the RDKit.
6 // The contents are covered by the terms of the BSD license
7 // which is included in the file license.txt, found at the root
8 // of the RDKit source tree.
9 //
10 /*! \file MolStandardize.h
11 
12  \brief Defines the CleanupParameters and some convenience functions.
13 
14 */
15 #include <RDGeneral/export.h>
16 #ifndef RD_MOLSTANDARDIZE_H
17 #define RD_MOLSTANDARDIZE_H
18 
19 #include <string>
20 #include <GraphMol/RDKitBase.h>
21 
22 namespace RDKit {
23 class RWMol;
24 class ROMol;
25 
26 namespace MolStandardize {
27 
28 //! The CleanupParameters structure defines the default parameters for the
29 /// cleanup process and also allows the user to customize the process by
30 /// changing the parameters.
31 /*!
32 
33  <b>Notes:</b>
34  - To customize the parameters, the structure must be initialized first.
35  (Another on the TODO list)
36  - For this project, not all the parameters have been revealed.
37  (TODO)
38 
39 */
41  // TODO reveal all parameters
42  private:
43  const char *rdbase_cstr = std::getenv("RDBASE");
44 
45  public:
46  std::string rdbase = rdbase_cstr != nullptr ? rdbase_cstr : "";
47  std::string normalizations;
48  std::string acidbaseFile;
49  std::string fragmentFile;
50  std::string tautomerTransforms;
51  int maxRestarts{200}; //! The maximum number of times to attempt to apply the
52  //! series of normalizations (default 200).
53  bool preferOrganic{false}; //! Whether to prioritize organic fragments when
54  //! choosing fragment parent (default False).
55  bool doCanonical{true}; //! Whether to apply normalizations in a
56  //! canonical order
57  int maxTautomers{1000}; //! The maximum number of tautomers to enumerate
58  //! (default 1000).
59  int maxTransforms{1000}; //! The maximum number of tautomer transformations
60  //! to apply (default 1000).
61  bool tautomerRemoveSp3Stereo{
62  true}; //! Whether to remove stereochemistry from sp3
63  //! centers involved in tautomerism (defaults to true)
64  bool tautomerRemoveBondStereo{
65  true}; //! Whether to remove stereochemistry from double
66  //! bonds involved in tautomerism (defaults to true)
67  bool tautomerRemoveIsotopicHs{
68  true}; //! Whether to remove isotopic Hs from centers
69  //! involved in tautomerism (defaults to true)
70  bool tautomerReassignStereo{
71  true}; //! Whether enumerate() should call assignStereochemistry
72  //! on all generated tautomers (defaults to true)
73  bool largestFragmentChooserUseAtomCount{
74  true}; //! Whether LargestFragmentChooser should use atom
75  //! count as main criterion before MW (defaults to true)
76  bool largestFragmentChooserCountHeavyAtomsOnly{
77  false}; //! Whether LargestFragmentChooser should only count
78  //! heavy atoms (defaults to false)
79  std::vector<std::pair<std::string, std::string>> normalizationData;
80  std::vector<std::pair<std::string, std::string>> fragmentData;
81  std::vector<std::tuple<std::string, std::string, std::string>> acidbaseData;
82  std::vector<std::tuple<std::string, std::string, std::string, std::string>>
85 };
86 
87 RDKIT_MOLSTANDARDIZE_EXPORT extern const CleanupParameters
89 
91  CleanupParameters &params, const std::string &json);
92 
93 //! The cleanup function is equivalent to the
94 /// molvs.Standardizer().standardize(mol) function. It calls the same steps,
95 /// namely: RemoveHs, RDKit SanitizeMol, MetalDisconnector, Normalizer,
96 /// Reionizer, RDKit AssignStereochemistry.
98  const RWMol *mol,
100 //! \overload
101 inline RWMol *cleanup(const RWMol &mol, const CleanupParameters &params =
103  return cleanup(&mol, params);
104 };
105 
106 //! Works the same as Normalizer().normalize(mol)
108  const RWMol *mol,
110 
111 //! Works the same as Reionizer().reionize(mol)
113  const RWMol *mol,
115 
116 //! Works the same as FragmentRemover().remove(mol)
118  const RWMol *mol,
120 
121 //! Works the same as TautomerEnumerator().canonicalize(mol)
123  const RWMol *mol,
125 
126 //! Returns the tautomer parent of a given molecule. The fragment parent is the
127 /// standardized canonical tautomer of the molecule
129  const RWMol &mol,
131  bool skipStandardize = false);
132 
133 //! Returns the fragment parent of a given molecule. The fragment parent is the
134 /// largest organic covalent unit in the molecule.
136  const RWMol &mol,
138  bool skip_standardize = false);
139 
140 //! calls removeStereochemistry() on the given molecule
142  const RWMol &mol,
144  bool skip_standardize = false);
145 
146 //! removes all isotopes specifications from the given molecule
148  const RWMol &mol,
150  bool skip_standardize = false);
151 
152 //! Returns the charge parent of a given molecule. The charge parent is the
153 //! uncharged version of the fragment parent.
155  const RWMol &mol,
157  bool skip_standardize = false);
158 
159 //! Returns the super parent. The super parent is the fragment, charge, isotope,
160 //! stereo, and tautomer parent of the molecule.
162  const RWMol &mol,
164  bool skip_standardize = false);
165 
166 //! Convenience function for quickly standardizing a single SMILES string.
167 /// Returns a standardized canonical SMILES string given a SMILES string.
168 /// This is the equivalent of calling cleanup() on each of the molecules
170  const std::string &smiles);
171 
172 //! TODO
174  const std::string &smiles,
176 }; // namespace MolStandardize
177 } // namespace RDKit
178 #endif
pulls in the core RDKit functionality
RWMol is a molecule class that is intended to be edited.
Definition: RWMol.h:32
#define RDKIT_MOLSTANDARDIZE_EXPORT
Definition: export.h:313
RDKIT_MOLSTANDARDIZE_EXPORT RWMol * removeFragments(const RWMol *mol, const CleanupParameters &params=defaultCleanupParameters)
Works the same as FragmentRemover().remove(mol)
RDKIT_MOLSTANDARDIZE_EXPORT RWMol * tautomerParent(const RWMol &mol, const CleanupParameters &params=defaultCleanupParameters, bool skipStandardize=false)
RDKIT_MOLSTANDARDIZE_EXPORT RWMol * canonicalTautomer(const RWMol *mol, const CleanupParameters &params=defaultCleanupParameters)
Works the same as TautomerEnumerator().canonicalize(mol)
RDKIT_MOLSTANDARDIZE_EXPORT std::vector< std::string > enumerateTautomerSmiles(const std::string &smiles, const CleanupParameters &params=defaultCleanupParameters)
TODO.
RDKIT_MOLSTANDARDIZE_EXPORT RWMol * cleanup(const RWMol *mol, const CleanupParameters &params=defaultCleanupParameters)
RDKIT_MOLSTANDARDIZE_EXPORT RWMol * stereoParent(const RWMol &mol, const CleanupParameters &params=defaultCleanupParameters, bool skip_standardize=false)
calls removeStereochemistry() on the given molecule
RDKIT_MOLSTANDARDIZE_EXPORT RWMol * fragmentParent(const RWMol &mol, const CleanupParameters &params=defaultCleanupParameters, bool skip_standardize=false)
RDKIT_MOLSTANDARDIZE_EXPORT RWMol * reionize(const RWMol *mol, const CleanupParameters &params=defaultCleanupParameters)
Works the same as Reionizer().reionize(mol)
RDKIT_MOLSTANDARDIZE_EXPORT RWMol * chargeParent(const RWMol &mol, const CleanupParameters &params=defaultCleanupParameters, bool skip_standardize=false)
RDKIT_MOLSTANDARDIZE_EXPORT const CleanupParameters defaultCleanupParameters
Definition: Fragment.h:25
RDKIT_MOLSTANDARDIZE_EXPORT RWMol * isotopeParent(const RWMol &mol, const CleanupParameters &params=defaultCleanupParameters, bool skip_standardize=false)
removes all isotopes specifications from the given molecule
RDKIT_MOLSTANDARDIZE_EXPORT std::string standardizeSmiles(const std::string &smiles)
RDKIT_MOLSTANDARDIZE_EXPORT RWMol * superParent(const RWMol &mol, const CleanupParameters &params=defaultCleanupParameters, bool skip_standardize=false)
RDKIT_MOLSTANDARDIZE_EXPORT RWMol * normalize(const RWMol *mol, const CleanupParameters &params=defaultCleanupParameters)
Works the same as Normalizer().normalize(mol)
RDKIT_MOLSTANDARDIZE_EXPORT void updateCleanupParamsFromJSON(CleanupParameters &params, const std::string &json)
Std stuff.
Definition: Abbreviations.h:18
std::vector< std::tuple< std::string, std::string, std::string, std::string > > tautomerTransformData
std::vector< std::tuple< std::string, std::string, std::string > > acidbaseData
std::vector< std::pair< std::string, std::string > > fragmentData
std::vector< std::pair< std::string, std::string > > normalizationData