RDKit
Open-source cheminformatics and machine learning.
MolOps.h
Go to the documentation of this file.
1 //
2 // Copyright (C) 2001-2021 Greg Landrum and Rational Discovery LLC
3 // Copyright (c) 2014, Novartis Institutes for BioMedical Research Inc.
4 //
5 // @@ All Rights Reserved @@
6 // This file is part of the RDKit.
7 // The contents are covered by the terms of the BSD license
8 // which is included in the file license.txt, found at the root
9 // of the RDKit source tree.
10 //
11 #include <RDGeneral/export.h>
12 #ifndef _RD_MOL_OPS_H_
13 #define _RD_MOL_OPS_H_
14 
15 #include <vector>
16 #include <map>
17 #include <list>
19 #include <boost/smart_ptr.hpp>
20 #include <boost/dynamic_bitset.hpp>
22 #include <RDGeneral/types.h>
23 #include "SanitException.h"
24 
25 RDKIT_GRAPHMOL_EXPORT extern const int ci_LOCAL_INF;
26 namespace RDKit {
27 class ROMol;
28 class RWMol;
29 class Atom;
30 class Bond;
31 class Conformer;
32 typedef std::vector<double> INVAR_VECT;
33 typedef INVAR_VECT::iterator INVAR_VECT_I;
34 typedef INVAR_VECT::const_iterator INVAR_VECT_CI;
35 
36 //! \brief Groups a variety of molecular query and transformation operations.
37 namespace MolOps {
38 
39 //! return the number of electrons available on an atom to donate for
40 /// aromaticity
41 /*!
42  The result is determined using the default valency, number of lone pairs,
43  number of bonds and the formal charge. Note that the atom may not donate
44  all of these electrons to a ring for aromaticity (also used in Conjugation
45  and hybridization code).
46 
47  \param at the atom of interest
48 
49  \return the number of electrons
50 */
52 
53 //! sums up all atomic formal charges and returns the result
55 
56 //! returns whether or not the given Atom is involved in a conjugated bond
58 
59 //! find fragments (disconnected components of the molecular graph)
60 /*!
61 
62  \param mol the molecule of interest
63  \param mapping used to return the mapping of Atoms->fragments.
64  On return \c mapping will be <tt>mol->getNumAtoms()</tt> long
65  and will contain the fragment assignment for each Atom
66 
67  \return the number of fragments found.
68 
69 */
70 RDKIT_GRAPHMOL_EXPORT unsigned int getMolFrags(const ROMol &mol,
71  std::vector<int> &mapping);
72 //! find fragments (disconnected components of the molecular graph)
73 /*!
74 
75  \param mol the molecule of interest
76  \param frags used to return the Atoms in each fragment
77  On return \c mapping will be \c numFrags long, and each entry
78  will contain the indices of the Atoms in that fragment.
79 
80  \return the number of fragments found.
81 
82 */
84  const ROMol &mol, std::vector<std::vector<int>> &frags);
85 
86 //! splits a molecule into its component fragments
87 /// (disconnected components of the molecular graph)
88 /*!
89 
90  \param mol the molecule of interest
91  \param sanitizeFrags toggles sanitization of the fragments after
92  they are built
93  \param frags used to return the mapping of Atoms->fragments.
94  if provided, \c frags will be <tt>mol->getNumAtoms()</tt> long
95  on return and will contain the fragment assignment for each Atom
96  \param fragsMolAtomMapping used to return the Atoms in each fragment
97  On return \c mapping will be \c numFrags long, and each entry
98  will contain the indices of the Atoms in that fragment.
99  \param copyConformers toggles copying conformers of the fragments after
100  they are built
101  \return a vector of the fragments as smart pointers to ROMols
102 
103 */
104 RDKIT_GRAPHMOL_EXPORT std::vector<boost::shared_ptr<ROMol>> getMolFrags(
105  const ROMol &mol, bool sanitizeFrags = true,
106  std::vector<int> *frags = nullptr,
107  std::vector<std::vector<int>> *fragsMolAtomMapping = nullptr,
108  bool copyConformers = true);
109 
110 //! splits a molecule into pieces based on labels assigned using a query
111 /*!
112 
113  \param mol the molecule of interest
114  \param query the query used to "label" the molecule for fragmentation
115  \param sanitizeFrags toggles sanitization of the fragments after
116  they are built
117  \param whiteList if provided, only labels in the list will be kept
118  \param negateList if true, the white list logic will be inverted: only labels
119  not in the list will be kept
120 
121  \return a map of the fragments and their labels
122 
123 */
124 template <typename T>
125 RDKIT_GRAPHMOL_EXPORT std::map<T, boost::shared_ptr<ROMol>>
126 getMolFragsWithQuery(const ROMol &mol, T (*query)(const ROMol &, const Atom *),
127  bool sanitizeFrags = true,
128  const std::vector<T> *whiteList = nullptr,
129  bool negateList = false);
130 
131 #if 0
132  //! finds a molecule's minimum spanning tree (MST)
133  /*!
134  \param mol the molecule of interest
135  \param mst used to return the MST as a vector of bond indices
136  */
137  RDKIT_GRAPHMOL_EXPORT void findSpanningTree(const ROMol &mol,std::vector<int> &mst);
138 #endif
139 
140 //! \name Dealing with hydrogens
141 //{@
142 
143 //! returns a copy of a molecule with hydrogens added in as explicit Atoms
144 /*!
145  \param mol the molecule to add Hs to
146  \param explicitOnly (optional) if this \c true, only explicit Hs will be
147  added
148  \param addCoords (optional) If this is true, estimates for the atomic
149  coordinates
150  of the added Hs will be used.
151  \param onlyOnAtoms (optional) if provided, this should be a vector of
152  IDs of the atoms that will be considered for H addition.
153  \param addResidueInfo (optional) if this is true, add residue info to
154  hydrogen atoms (useful for PDB files).
155 
156  \return the new molecule
157 
158  <b>Notes:</b>
159  - it makes no sense to use the \c addCoords option if the molecule's
160  heavy
161  atoms don't already have coordinates.
162  - the caller is responsible for <tt>delete</tt>ing the pointer this
163  returns.
164  */
165 RDKIT_GRAPHMOL_EXPORT ROMol *addHs(const ROMol &mol, bool explicitOnly = false,
166  bool addCoords = false,
167  const UINT_VECT *onlyOnAtoms = nullptr,
168  bool addResidueInfo = false);
169 //! \overload
170 /// modifies the molecule in place
171 RDKIT_GRAPHMOL_EXPORT void addHs(RWMol &mol, bool explicitOnly = false,
172  bool addCoords = false,
173  const UINT_VECT *onlyOnAtoms = nullptr,
174  bool addResidueInfo = false);
175 
176 //! Sets Cartesian coordinates for a terminal atom.
177 //! Useful for growing an atom off a molecule with sensible
178 //! coordinates based on the geometry of the neighbor.
179 /*!
180  NOTE: this sets appropriate coordinates in all of the molecule's conformers.
181  \param mol the molecule the atoms belong to
182  \param idx index of the terminal atom whose coordinates are set
183  \param otherIdx index of the bonded neighbor atom
184 */
185 
187  unsigned int otherIdx);
188 
189 //! returns a copy of a molecule with hydrogens removed
190 /*!
191  \param mol the molecule to remove Hs from
192  \param implicitOnly (optional) if this \c true, only implicit Hs will be
193  removed
194  \param updateExplicitCount (optional) If this is \c true, when explicit Hs
195  are removed
196  from the graph, the heavy atom to which they are bound will have its
197  counter of
198  explicit Hs increased.
199  \param sanitize: (optional) If this is \c true, the final molecule will be
200  sanitized
201 
202  \return the new molecule
203 
204  <b>Notes:</b>
205  - Hydrogens which aren't connected to a heavy atom will not be
206  removed. This prevents molecules like <tt>"[H][H]"</tt> from having
207  all atoms removed.
208  - Labelled hydrogen (e.g. atoms with atomic number=1, but mass > 1),
209  will not be removed.
210  - two coordinate Hs, like the central H in C[H-]C, will not be removed
211  - Hs connected to dummy atoms will not be removed
212  - Hs that are part of the definition of double bond Stereochemistry
213  will not be removed
214  - Hs that are not connected to anything else will not be removed
215  - Hs that have a query defined (i.e. hasQuery() returns true) will not
216  be removed
217 
218  - the caller is responsible for <tt>delete</tt>ing the pointer this
219  returns.
220 */
221 
223  bool implicitOnly = false,
224  bool updateExplicitCount = false,
225  bool sanitize = true);
226 //! \overload
227 /// modifies the molecule in place
228 RDKIT_GRAPHMOL_EXPORT void removeHs(RWMol &mol, bool implicitOnly = false,
229  bool updateExplicitCount = false,
230  bool sanitize = true);
232  bool removeDegreeZero = false; /**< hydrogens that have no bonds */
233  bool removeHigherDegrees = false; /**< hydrogens with two (or more) bonds */
234  bool removeOnlyHNeighbors =
235  false; /**< hydrogens with bonds only to other hydrogens */
236  bool removeIsotopes = false; /**< hydrogens with non-default isotopes */
237  bool removeAndTrackIsotopes = false; /**< removes hydrogens with non-default
238  isotopes and keeps track of the heavy atom the isotopes were attached to in
239  the private _isotopicHs atom property, so they are re-added by AddHs() as the
240  original isotopes if possible*/
241  bool removeDummyNeighbors =
242  false; /**< hydrogens with at least one dummy-atom neighbor */
243  bool removeDefiningBondStereo =
244  false; /**< hydrogens defining bond stereochemistry */
245  bool removeWithWedgedBond = true; /**< hydrogens with wedged bonds to them */
246  bool removeWithQuery = false; /**< hydrogens with queries defined */
247  bool removeMapped = true; /**< mapped hydrogens */
248  bool removeInSGroups = false; /**< part of a SubstanceGroup */
249  bool showWarnings = true; /**< display warnings for Hs that are not removed */
250  bool removeNonimplicit = true; /**< DEPRECATED equivalent of !implicitOnly */
251  bool updateExplicitCount =
252  false; /**< DEPRECATED equivalent of updateExplicitCount */
253  bool removeHydrides = true; /**< Removing Hydrides */
254 };
255 //! \overload
256 /// modifies the molecule in place
258  bool sanitize = true);
259 //! \overload
260 /// The caller owns the pointer this returns
262  const RemoveHsParameters &ps,
263  bool sanitize = true);
264 
265 //! removes all Hs from a molecule
266 RDKIT_GRAPHMOL_EXPORT void removeAllHs(RWMol &mol, bool sanitize = true);
267 //! \overload
268 /// The caller owns the pointer this returns
270  bool sanitize = true);
271 
272 //! returns a copy of a molecule with hydrogens removed and added as queries
273 //! to the heavy atoms to which they are bound.
274 /*!
275  This is really intended to be used with molecules that contain QueryAtoms
276 
277  \param mol the molecule to remove Hs from
278 
279  \return the new molecule
280 
281  <b>Notes:</b>
282  - Atoms that do not already have hydrogen count queries will have one
283  added, other H-related queries will not be touched. Examples:
284  - C[H] -> [C;!H0]
285  - [C;H1][H] -> [C;H1]
286  - [C;H2][H] -> [C;H2]
287  - Hydrogens which aren't connected to a heavy atom will not be
288  removed. This prevents molecules like <tt>"[H][H]"</tt> from having
289  all atoms removed.
290  - the caller is responsible for <tt>delete</tt>ing the pointer this
291  returns.
292  - By default all hydrogens are removed, however if
293  mergeUnmappedOnly is true, any hydrogen participating
294  in an atom map will be retained
295 
296 */
298  bool mergeUnmappedOnly = false);
299 //! \overload
300 /// modifies the molecule in place
302  bool mergeUnmappedOnly = false);
303 
304 typedef enum {
311  ADJUST_IGNOREALL = 0xFFFFFFF
313 
314 //! Parameters controlling the behavior of MolOps::adjustQueryProperties
315 /*!
316 
317  Note that some of the options here are either directly contradictory or make
318  no sense when combined with each other. We generally assume that client code
319  is doing something sensible and don't attempt to detect possible conflicts or
320  problems.
321 
322 */
324  bool adjustDegree = true; /**< add degree queries */
325  std::uint32_t adjustDegreeFlags = ADJUST_IGNOREDUMMIES | ADJUST_IGNORECHAINS;
326 
327  bool adjustRingCount = false; /**< add ring-count queries */
328  std::uint32_t adjustRingCountFlags =
330 
331  bool makeDummiesQueries = true; /**< convert dummy atoms without isotope
332  labels to any-atom queries */
333 
334  bool aromatizeIfPossible = true; /**< perceive and set aromaticity */
335 
336  bool makeBondsGeneric =
337  false; /**< convert bonds to generic queries (any bonds) */
338  std::uint32_t makeBondsGenericFlags = ADJUST_IGNORENONE;
339 
340  bool makeAtomsGeneric =
341  false; /**< convert atoms to generic queries (any atoms) */
342  std::uint32_t makeAtomsGenericFlags = ADJUST_IGNORENONE;
343 
344  bool adjustHeavyDegree = false; /**< adjust the heavy-atom degree instead of
345  overall degree */
346  std::uint32_t adjustHeavyDegreeFlags =
348 
349  bool adjustRingChain = false; /**< add ring-chain queries */
350  std::uint32_t adjustRingChainFlags = ADJUST_IGNORENONE;
351 
352  bool useStereoCareForBonds =
353  false; /**< remove stereochemistry info from double bonds that do not have
354  the stereoCare property set */
355 
356  bool adjustConjugatedFiveRings =
357  false; /**< sets bond queries in conjugated five-rings to
358  SINGLE|DOUBLE|AROMATIC */
359 
360  bool setMDLFiveRingAromaticity =
361  false; /**< uses the 5-ring aromaticity behavior of the (former) MDL
362  software as documented in the Chemical Representation Guide */
363 
364  bool adjustSingleBondsToDegreeOneNeighbors =
365  false; /**< sets single bonds between aromatic or conjugated atoms and
366  degree one neighbors to SINGLE|AROMATIC */
367 
368  bool adjustSingleBondsBetweenAromaticAtoms =
369  false; /**< sets non-ring single bonds between two aromatic or conjugated
370  atoms to SINGLE|AROMATIC */
371  //! \brief returns an AdjustQueryParameters object with all adjustments
372  //! disabled
375  res.adjustDegree = false;
376  res.makeDummiesQueries = false;
377  res.aromatizeIfPossible = false;
378  return res;
379  }
381 };
382 
383 //! updates an AdjustQueryParameters object from a JSON string
385  MolOps::AdjustQueryParameters &p, const std::string &json);
386 
387 //! returns a copy of a molecule with query properties adjusted
388 /*!
389  \param mol the molecule to adjust
390  \param params controls the adjustments made
391 
392  \return the new molecule, the caller owns the memory
393 */
395  const ROMol &mol, const AdjustQueryParameters *params = nullptr);
396 //! \overload
397 /// modifies the molecule in place
399  RWMol &mol, const AdjustQueryParameters *params = nullptr);
400 
401 //! returns a copy of a molecule with the atoms renumbered
402 /*!
403 
404  \param mol the molecule to work with
405  \param newOrder the new ordering of the atoms (should be numAtoms long)
406  for example: if newOrder is [3,2,0,1], then atom 3 in the original
407  molecule will be atom 0 in the new one
408 
409  \return the new molecule
410 
411  <b>Notes:</b>
412  - the caller is responsible for <tt>delete</tt>ing the pointer this
413  returns.
414 
415 */
417  const ROMol &mol, const std::vector<unsigned int> &newOrder);
418 
419 //@}
420 
421 //! \name Sanitization
422 /// {
423 
424 typedef enum {
436  SANITIZE_ALL = 0xFFFFFFF
438 
439 //! \brief carries out a collection of tasks for cleaning up a molecule and
440 /// ensuring
441 //! that it makes "chemical sense"
442 /*!
443  This functions calls the following in sequence
444  -# MolOps::cleanUp()
445  -# mol.updatePropertyCache()
446  -# MolOps::symmetrizeSSSR()
447  -# MolOps::Kekulize()
448  -# MolOps::assignRadicals()
449  -# MolOps::setAromaticity()
450  -# MolOps::setConjugation()
451  -# MolOps::setHybridization()
452  -# MolOps::cleanupChirality()
453  -# MolOps::adjustHs()
454 
455  \param mol : the RWMol to be cleaned
456 
457  \param operationThatFailed : the first (if any) sanitization operation that
458  fails is set here.
459  The values are taken from the \c SanitizeFlags
460  enum. On success, the value is \c
461  SanitizeFlags::SANITIZE_NONE
462 
463  \param sanitizeOps : the bits here are used to set which sanitization
464  operations are carried out. The elements of the \c
465  SanitizeFlags enum define the operations.
466 
467  <b>Notes:</b>
468  - If there is a failure in the sanitization, a \c MolSanitizeException
469  will be thrown.
470  - in general the user of this function should cast the molecule following
471  this function to a ROMol, so that new atoms and bonds cannot be added to
472  the molecule and screw up the sanitizing that has been done here
473 */
475  unsigned int &operationThatFailed,
476  unsigned int sanitizeOps = SANITIZE_ALL);
477 //! \overload
479 
480 //! \brief Identifies chemistry problems (things that don't make chemical
481 //! sense) in a molecule
482 /*!
483  This functions uses the operations in sanitizeMol but does not change
484  the input structure and returns a list of the problems encountered instead
485  of stopping at the first failure,
486 
487  The problems this looks for come from the sanitization operations:
488  -# mol.updatePropertyCache() : Unreasonable valences
489  -# MolOps::Kekulize() : Unkekulizable ring systems, aromatic atoms not
490  in rings, aromatic bonds to non-aromatic atoms.
491 
492  \param mol : the ROMol to be cleaned
493 
494  \param sanitizeOps : the bits here are used to set which sanitization
495  operations are carried out. The elements of the \c
496  SanitizeFlags enum define the operations.
497 
498  \return a vector of \c MolSanitizeException values that indicate what
499  problems were encountered
500 
501 */
503 std::vector<std::unique_ptr<MolSanitizeException>> detectChemistryProblems(
504  const ROMol &mol, unsigned int sanitizeOps = SANITIZE_ALL);
505 
506 //! Possible aromaticity models
507 /*!
508 - \c AROMATICITY_DEFAULT at the moment always uses \c AROMATICITY_RDKIT
509 - \c AROMATICITY_RDKIT is the standard RDKit model (as documented in the RDKit
510 Book)
511 - \c AROMATICITY_SIMPLE only considers 5- and 6-membered simple rings (it
512 does not consider the outer envelope of fused rings)
513 - \c AROMATICITY_MDL
514 - \c AROMATICITY_CUSTOM uses a caller-provided function
515 */
516 typedef enum {
517  AROMATICITY_DEFAULT = 0x0, ///< future proofing
521  AROMATICITY_CUSTOM = 0xFFFFFFF ///< use a function
523 
524 //! Sets up the aromaticity for a molecule
525 /*!
526 
527  This is what happens here:
528  -# find all the simple rings by calling the findSSSR function
529  -# loop over all the Atoms in each ring and mark them if they are
530  candidates
531  for aromaticity. A ring atom is a candidate if it can spare electrons
532  to the ring and if it's from the first two rows of the periodic table.
533  -# based on the candidate atoms, mark the rings to be either candidates
534  or non-candidates. A ring is a candidate only if all its atoms are
535  candidates
536  -# apply Hueckel rule to each of the candidate rings to check if the ring
537  can be
538  aromatic
539 
540  \param mol the RWMol of interest
541  \param model the aromaticity model to use
542  \param func a custom function for assigning aromaticity (only used when
543  model=\c AROMATICITY_CUSTOM)
544 
545  \return >0 on success, <= 0 otherwise
546 
547  <b>Assumptions:</b>
548  - Kekulization has been done (i.e. \c MolOps::Kekulize() has already
549  been called)
550 
551 */
554  int (*func)(RWMol &) = nullptr);
555 
556 //! Designed to be called by the sanitizer to handle special cases before
557 /// anything is done.
558 /*!
559 
560  Currently this:
561  - modifies nitro groups, so that the nitrogen does not have an
562  unreasonable valence of 5, as follows:
563  - the nitrogen gets a positive charge
564  - one of the oxygens gets a negative chage and the double bond to
565  this oxygen is changed to a single bond The net result is that nitro groups
566  can be counted on to be: \c "[N+](=O)[O-]"
567  - modifies halogen-oxygen containing species as follows:
568  \c [Cl,Br,I](=O)(=O)(=O)O -> [X+3]([O-])([O-])([O-])O
569  \c [Cl,Br,I](=O)(=O)O -> [X+3]([O-])([O-])O
570  \c [Cl,Br,I](=O)O -> [X+]([O-])O
571  - converts the substructure [N,C]=P(=O)-* to [N,C]=[P+](-[O-])-*
572 
573  \param mol the molecule of interest
574 
575 */
577 
578 //! Called by the sanitizer to assign radical counts to atoms
580 
581 //! adjust the number of implicit and explicit Hs for special cases
582 /*!
583 
584  Currently this:
585  - modifies aromatic nitrogens so that, when appropriate, they have an
586  explicit H marked (e.g. so that we get things like \c "c1cc[nH]cc1"
587 
588  \param mol the molecule of interest
589 
590  <b>Assumptions</b>
591  - this is called after the molecule has been sanitized,
592  aromaticity has been perceived, and the implicit valence of
593  everything has been calculated.
594 
595 */
597 
598 //! Kekulizes the molecule
599 /*!
600 
601  \param mol the molecule of interest
602 
603  \param markAtomsBonds if this is set to true, \c isAromatic boolean settings
604  on both the Bonds and Atoms are turned to false following the Kekulization,
605  otherwise they are left alone in their original state.
606 
607  \param maxBackTracks the maximum number of attempts at back-tracking. The
608  algorithm uses a back-tracking procedure to revisit a previous setting of
609  double bond if we hit a wall in the kekulization process
610 
611  <b>Notes:</b>
612  - even if \c markAtomsBonds is \c false the \c BondType for all aromatic
613  bonds will be changed from \c RDKit::Bond::AROMATIC to \c
614  RDKit::Bond::SINGLE or RDKit::Bond::DOUBLE during Kekulization.
615 
616 */
617 RDKIT_GRAPHMOL_EXPORT void Kekulize(RWMol &mol, bool markAtomsBonds = true,
618  unsigned int maxBackTracks = 100);
619 //! Kekulizes the molecule if possible. If the kekulization fails the molecule
620 //! will not be modified
621 /*!
622 
623  \param mol the molecule of interest
624 
625  \param markAtomsBonds if this is set to true, \c isAromatic boolean settings
626  on both the Bonds and Atoms are turned to false following the Kekulization,
627  otherwise they are left alone in their original state.
628 
629  \param maxBackTracks the maximum number of attempts at back-tracking. The
630  algorithm uses a back-tracking procedure to revisit a previous setting of
631  double bond if we hit a wall in the kekulization process
632 
633  \returns whether or not the kekulization succeeded
634 
635  <b>Notes:</b>
636  - even if \c markAtomsBonds is \c false the \c BondType for all aromatic
637  bonds will be changed from \c RDKit::Bond::AROMATIC to \c
638  RDKit::Bond::SINGLE or RDKit::Bond::DOUBLE during Kekulization.
639 
640 */
642  bool markAtomsBonds = true,
643  unsigned int maxBackTracks = 100);
644 
645 //! flags the molecule's conjugated bonds
647 
648 //! calculates and sets the hybridization of all a molecule's Stoms
650 
651 // @}
652 
653 //! \name Ring finding and SSSR
654 //@{
655 
656 //! finds a molecule's Smallest Set of Smallest Rings
657 /*!
658  Currently this implements a modified form of Figueras algorithm
659  (JCICS - Vol. 36, No. 5, 1996, 986-991)
660 
661  \param mol the molecule of interest
662  \param res used to return the vector of rings. Each entry is a vector with
663  atom indices. This information is also stored in the molecule's
664  RingInfo structure, so this argument is optional (see overload)
665 
666  \return number of smallest rings found
667 
668  Base algorithm:
669  - The original algorithm starts by finding representative degree 2
670  nodes.
671  - Representative because if a series of deg 2 nodes are found only
672  one of them is picked.
673  - The smallest ring around each of them is found.
674  - The bonds that connect to this degree 2 node are them chopped off,
675  yielding
676  new deg two nodes
677  - The process is repeated on the new deg 2 nodes.
678  - If no deg 2 nodes are found, a deg 3 node is picked. The smallest ring
679  with it is found. A bond from this is "carefully" (look in the paper)
680  selected and chopped, yielding deg 2 nodes. The process is same as
681  above once this is done.
682 
683  Our Modifications:
684  - If available, more than one smallest ring around a representative deg 2
685  node will be computed and stored
686  - Typically 3 rings are found around a degree 3 node (when no deg 2s are
687  available)
688  and all the bond to that node are chopped.
689  - The extra rings that were found in this process are removed after all
690  the nodes have been covered.
691 
692  These changes were motivated by several factors:
693  - We believe the original algorithm fails to find the correct SSSR
694  (finds the correct number of them but the wrong ones) on some sample
695  mols
696  - Since SSSR may not be unique, a post-SSSR step to symmetrize may be
697  done. The extra rings this process adds can be quite useful.
698 */
700  std::vector<std::vector<int>> &res);
701 //! \overload
703  const ROMol &mol, std::vector<std::vector<int>> *res = nullptr);
704 
705 //! use a DFS algorithm to identify ring bonds and atoms in a molecule
706 /*!
707  \b NOTE: though the RingInfo structure is populated by this function,
708  the only really reliable calls that can be made are to check if
709  mol.getRingInfo().numAtomRings(idx) or mol.getRingInfo().numBondRings(idx)
710  return values >0
711 */
713 
715 
716 //! symmetrize the molecule's Smallest Set of Smallest Rings
717 /*!
718  SSSR rings obatined from "findSSSR" can be non-unique in some case.
719  For example, cubane has five SSSR rings, not six as one would hope.
720 
721  This function adds additional rings to the SSSR list if necessary
722  to make the list symmetric, e.g. all atoms in cubane will be part of the
723  same number of SSSRs. This function choses these extra rings from the extra
724  rings computed and discarded during findSSSR. The new ring are chosen such
725  that:
726  - replacing a same sized ring in the SSSR list with an extra ring yields
727  the same union of bond IDs as the original SSSR list
728 
729  \param mol - the molecule of interest
730  \param res used to return the vector of rings. Each entry is a vector with
731  atom indices. This information is also stored in the molecule's
732  RingInfo structure, so this argument is optional (see overload)
733 
734  \return the total number of rings = (new rings + old SSSRs)
735 
736  <b>Notes:</b>
737  - if no SSSR rings are found on the molecule - MolOps::findSSSR() is called
738  first
739 */
741  std::vector<std::vector<int>> &res);
742 //! \overload
744 
745 //@}
746 
747 //! \name Shortest paths and other matrices
748 //@{
749 
750 //! returns a molecule's adjacency matrix
751 /*!
752  \param mol the molecule of interest
753  \param useBO toggles use of bond orders in the matrix
754  \param emptyVal sets the empty value (for non-adjacent atoms)
755  \param force forces calculation of the matrix, even if already
756  computed
757  \param propNamePrefix used to set the cached property name
758 
759  \return the adjacency matrix.
760 
761  <b>Notes</b>
762  - The result of this is cached in the molecule's local property
763  dictionary, which will handle deallocation. The caller should <b>not</b> \c
764  delete this pointer.
765 
766 */
768  const ROMol &mol, bool useBO = false, int emptyVal = 0, bool force = false,
769  const char *propNamePrefix = nullptr,
770  const boost::dynamic_bitset<> *bondsToUse = nullptr);
771 
772 //! Computes the molecule's topological distance matrix
773 /*!
774  Uses the Floyd-Warshall all-pairs-shortest-paths algorithm.
775 
776  \param mol the molecule of interest
777  \param useBO toggles use of bond orders in the matrix
778  \param useAtomWts sets the diagonal elements of the result to
779  6.0/(atomic number) so that the matrix can be used to calculate
780  Balaban J values. This does not affect the bond weights.
781  \param force forces calculation of the matrix, even if already
782  computed
783  \param propNamePrefix used to set the cached property name
784 
785  \return the distance matrix.
786 
787  <b>Notes</b>
788  - The result of this is cached in the molecule's local property
789  dictionary, which will handle deallocation. The caller should <b>not</b> \c
790  delete this pointer.
791 
792 
793 */
795  const ROMol &mol, bool useBO = false, bool useAtomWts = false,
796  bool force = false, const char *propNamePrefix = nullptr);
797 
798 //! Computes the molecule's topological distance matrix
799 /*!
800  Uses the Floyd-Warshall all-pairs-shortest-paths algorithm.
801 
802  \param mol the molecule of interest
803  \param activeAtoms only elements corresponding to these atom indices
804  will be included in the calculation
805  \param bonds only bonds found in this list will be included in the
806  calculation
807  \param useBO toggles use of bond orders in the matrix
808  \param useAtomWts sets the diagonal elements of the result to
809  6.0/(atomic number) so that the matrix can be used to calculate
810  Balaban J values. This does not affect the bond weights.
811 
812  \return the distance matrix.
813 
814  <b>Notes</b>
815  - The results of this call are not cached, the caller <b>should</b> \c
816  delete
817  this pointer.
818 
819 
820 */
822  const ROMol &mol, const std::vector<int> &activeAtoms,
823  const std::vector<const Bond *> &bonds, bool useBO = false,
824  bool useAtomWts = false);
825 
826 //! Computes the molecule's 3D distance matrix
827 /*!
828 
829  \param mol the molecule of interest
830  \param confId the conformer to use
831  \param useAtomWts sets the diagonal elements of the result to
832  6.0/(atomic number)
833  \param force forces calculation of the matrix, even if already
834  computed
835  \param propNamePrefix used to set the cached property name
836  (if set to an empty string, the matrix will not be
837  cached)
838 
839  \return the distance matrix.
840 
841  <b>Notes</b>
842  - If propNamePrefix is not empty the result of this is cached in the
843  molecule's local property dictionary, which will handle deallocation.
844  In other cases the caller is responsible for freeing the memory.
845 
846 */
848  const ROMol &mol, int confId = -1, bool useAtomWts = false,
849  bool force = false, const char *propNamePrefix = nullptr);
850 //! Find the shortest path between two atoms
851 /*!
852  Uses the Bellman-Ford algorithm
853 
854  \param mol molecule of interest
855  \param aid1 index of the first atom
856  \param aid2 index of the second atom
857 
858  \return an std::list with the indices of the atoms along the shortest
859  path
860 
861  <b>Notes:</b>
862  - the starting and end atoms are included in the path
863  - if no path is found, an empty path is returned
864 
865 */
866 RDKIT_GRAPHMOL_EXPORT std::list<int> getShortestPath(const ROMol &mol, int aid1,
867  int aid2);
868 
869 //@}
870 
871 //! \name Stereochemistry
872 //@{
873 
874 //! removes bogus chirality markers (those on non-sp3 centers):
876 
877 //! \brief Uses a conformer to assign ChiralType to a molecule's atoms
878 /*!
879  \param mol the molecule of interest
880  \param confId the conformer to use
881  \param replaceExistingTags if this flag is true, any existing atomic chiral
882  tags will be replaced
883 
884  If the conformer provided is not a 3D conformer, nothing will be done.
885 */
887  ROMol &mol, int confId = -1, bool replaceExistingTags = true);
888 
889 //! \brief Uses a conformer to assign ChiralTypes to a molecule's atoms and
890 //! stereo flags to its bonds
891 /*!
892 
893  \param mol the molecule of interest
894  \param confId the conformer to use
895  \param replaceExistingTags if this flag is true, any existing info about
896  stereochemistry will be replaced
897 
898  If the conformer provided is not a 3D conformer, nothing will be done.
899 */
901  ROMol &mol, int confId = -1, bool replaceExistingTags = true);
902 
903 //! \brief Use bond directions to assign ChiralTypes to a molecule's atoms and
904 //! stereo flags to its bonds
905 /*!
906 
907  \param mol the molecule of interest
908  \param confId the conformer to use
909  \param replaceExistingTags if this flag is true, any existing info about
910  stereochemistry will be replaced
911 */
913  ROMol &mol, int confId = -1, bool replaceExistingTags = true);
914 
915 //! \deprecated: this function will be removed in a future release. Use
916 //! setDoubleBondNeighborDirections() instead
918  int confId = -1);
919 //! Sets bond directions based on double bond stereochemistry
921  ROMol &mol, const Conformer *conf = nullptr);
922 
923 //! Assign CIS/TRANS bond stereochemistry tags based on neighboring directions
925 
926 //! Assign stereochemistry tags to atoms (i.e. R/S) and bonds (i.e. Z/E)
927 /*!
928  Does the CIP stereochemistry assignment for the molecule's atoms
929  (R/S) and double bond (Z/E). Chiral atoms will have a property
930  '_CIPCode' indicating their chiral code.
931 
932  \param mol the molecule to use
933  \param cleanIt if true, any existing values of the property `_CIPCode`
934  will be cleared, atoms with a chiral specifier that aren't
935  actually chiral (e.g. atoms with duplicate
936  substituents or only 2 substituents, etc.) will have
937  their chiral code set to CHI_UNSPECIFIED. Bonds with
938  STEREOCIS/STEREOTRANS specified that have duplicate
939  substituents based upon the CIP atom ranks will be
940  marked STEREONONE.
941  \param force causes the calculation to be repeated even if it has
942  already been done
943  \param flagPossibleStereoCenters set the _ChiralityPossible property on
944  atoms that are possible stereocenters
945 
946  <b>Notes:M</b>
947  - Throughout we assume that we're working with a hydrogen-suppressed
948  graph.
949 
950 */
952  ROMol &mol, bool cleanIt = false, bool force = false,
953  bool flagPossibleStereoCenters = false);
954 //! Removes all stereochemistry information from atoms (i.e. R/S) and bonds
955 /// i.e. Z/E)
956 /*!
957 
958  \param mol the molecule of interest
959 */
961 
962 //! \brief finds bonds that could be cis/trans in a molecule and mark them as
963 //! Bond::STEREOANY.
964 /*!
965  \param mol the molecule of interest
966  \param cleanIt toggles removal of stereo flags from double bonds that can
967  not have stereochemistry
968 
969  This function finds any double bonds that can potentially be part of
970  a cis/trans system. No attempt is made here to mark them cis or
971  trans. No attempt is made to detect double bond stereo in ring systems.
972 
973  This function is useful in the following situations:
974  - when parsing a mol file; for the bonds marked here, coordinate
975  information on the neighbors can be used to indentify cis or trans states
976  - when writing a mol file; bonds that can be cis/trans but not marked as
977  either need to be specially marked in the mol file
978  - finding double bonds with unspecified stereochemistry so they
979  can be enumerated for downstream 3D tools
980 
981  The CIPranks on the neighboring atoms are checked in this function. The
982  _CIPCode property if set to any on the double bond.
983 */
985  bool cleanIt = false);
986 //! \brief Uses the molParity atom property to assign ChiralType to a molecule's
987 //! atoms
988 /*!
989  \param mol the molecule of interest
990  \param replaceExistingTags if this flag is true, any existing atomic chiral
991  tags will be replaced
992 */
994  ROMol &mol, bool replaceExistingTags = true);
995 
996 //@}
997 
998 //! returns the number of atoms which have a particular property set
1000  const ROMol &mol, std::string prop);
1001 
1002 //! returns whether or not a molecule needs to have Hs added to it.
1004 
1005 namespace details {
1006 //! not recommended for use in other code
1008  RWMol &mol, const boost::dynamic_bitset<> &atomsToUse,
1009  const boost::dynamic_bitset<> &bondsToUse, bool markAtomsBonds = true,
1010  unsigned int maxBackTracks = 100);
1011 } // namespace details
1012 
1013 } // namespace MolOps
1014 } // namespace RDKit
1015 
1016 #endif
RDKIT_GRAPHMOL_EXPORT const int ci_LOCAL_INF
The class for representing atoms.
Definition: Atom.h:68
The class for representing 2D or 3D conformation of a molecule.
Definition: Conformer.h:45
RWMol is a molecule class that is intended to be edited.
Definition: RWMol.h:32
#define RDKIT_GRAPHMOL_EXPORT
Definition: export.h:217
RDKIT_GRAPHMOL_EXPORT void KekulizeFragment(RWMol &mol, const boost::dynamic_bitset<> &atomsToUse, const boost::dynamic_bitset<> &bondsToUse, bool markAtomsBonds=true, unsigned int maxBackTracks=100)
not recommended for use in other code
RDKIT_GRAPHMOL_EXPORT double * get3DDistanceMat(const ROMol &mol, int confId=-1, bool useAtomWts=false, bool force=false, const char *propNamePrefix=nullptr)
Computes the molecule's 3D distance matrix.
RDKIT_GRAPHMOL_EXPORT void cleanUp(RWMol &mol)
RDKIT_GRAPHMOL_EXPORT void assignStereochemistry(ROMol &mol, bool cleanIt=false, bool force=false, bool flagPossibleStereoCenters=false)
Assign stereochemistry tags to atoms (i.e. R/S) and bonds (i.e. Z/E)
RDKIT_GRAPHMOL_EXPORT bool KekulizeIfPossible(RWMol &mol, bool markAtomsBonds=true, unsigned int maxBackTracks=100)
RDKIT_GRAPHMOL_EXPORT std::vector< std::unique_ptr< MolSanitizeException > > detectChemistryProblems(const ROMol &mol, unsigned int sanitizeOps=SANITIZE_ALL)
Identifies chemistry problems (things that don't make chemical sense) in a molecule.
RDKIT_GRAPHMOL_EXPORT double * getAdjacencyMatrix(const ROMol &mol, bool useBO=false, int emptyVal=0, bool force=false, const char *propNamePrefix=nullptr, const boost::dynamic_bitset<> *bondsToUse=nullptr)
returns a molecule's adjacency matrix
RDKIT_GRAPHMOL_EXPORT ROMol * mergeQueryHs(const ROMol &mol, bool mergeUnmappedOnly=false)
RDKIT_GRAPHMOL_EXPORT void assignChiralTypesFromBondDirs(ROMol &mol, int confId=-1, bool replaceExistingTags=true)
Use bond directions to assign ChiralTypes to a molecule's atoms and stereo flags to its bonds.
RDKIT_GRAPHMOL_EXPORT int setAromaticity(RWMol &mol, AromaticityModel model=AROMATICITY_DEFAULT, int(*func)(RWMol &)=nullptr)
Sets up the aromaticity for a molecule.
RDKIT_GRAPHMOL_EXPORT void findRingFamilies(const ROMol &mol)
RDKIT_GRAPHMOL_EXPORT bool needsHs(const ROMol &mol)
returns whether or not a molecule needs to have Hs added to it.
RDKIT_GRAPHMOL_EXPORT void fastFindRings(const ROMol &mol)
use a DFS algorithm to identify ring bonds and atoms in a molecule
RDKIT_GRAPHMOL_EXPORT double * getDistanceMat(const ROMol &mol, bool useBO=false, bool useAtomWts=false, bool force=false, const char *propNamePrefix=nullptr)
Computes the molecule's topological distance matrix.
RDKIT_GRAPHMOL_EXPORT int getFormalCharge(const ROMol &mol)
sums up all atomic formal charges and returns the result
AromaticityModel
Possible aromaticity models.
Definition: MolOps.h:516
@ AROMATICITY_RDKIT
Definition: MolOps.h:518
@ AROMATICITY_MDL
Definition: MolOps.h:520
@ AROMATICITY_CUSTOM
use a function
Definition: MolOps.h:521
@ AROMATICITY_DEFAULT
future proofing
Definition: MolOps.h:517
@ AROMATICITY_SIMPLE
Definition: MolOps.h:519
RDKIT_GRAPHMOL_EXPORT void setTerminalAtomCoords(ROMol &mol, unsigned int idx, unsigned int otherIdx)
RDKIT_GRAPHMOL_EXPORT std::map< T, boost::shared_ptr< ROMol > > getMolFragsWithQuery(const ROMol &mol, T(*query)(const ROMol &, const Atom *), bool sanitizeFrags=true, const std::vector< T > *whiteList=nullptr, bool negateList=false)
splits a molecule into pieces based on labels assigned using a query
RDKIT_GRAPHMOL_EXPORT void removeStereochemistry(ROMol &mol)
RDKIT_GRAPHMOL_EXPORT ROMol * addHs(const ROMol &mol, bool explicitOnly=false, bool addCoords=false, const UINT_VECT *onlyOnAtoms=nullptr, bool addResidueInfo=false)
returns a copy of a molecule with hydrogens added in as explicit Atoms
RDKIT_GRAPHMOL_EXPORT void assignChiralTypesFromMolParity(ROMol &mol, bool replaceExistingTags=true)
Uses the molParity atom property to assign ChiralType to a molecule's atoms.
RDKIT_GRAPHMOL_EXPORT unsigned int getMolFrags(const ROMol &mol, std::vector< int > &mapping)
find fragments (disconnected components of the molecular graph)
RDKIT_GRAPHMOL_EXPORT void adjustHs(RWMol &mol)
adjust the number of implicit and explicit Hs for special cases
RDKIT_GRAPHMOL_EXPORT void assignStereochemistryFrom3D(ROMol &mol, int confId=-1, bool replaceExistingTags=true)
Uses a conformer to assign ChiralTypes to a molecule's atoms and stereo flags to its bonds.
@ SANITIZE_ALL
Definition: MolOps.h:436
@ SANITIZE_SETAROMATICITY
Definition: MolOps.h:431
@ SANITIZE_NONE
Definition: MolOps.h:425
@ SANITIZE_PROPERTIES
Definition: MolOps.h:427
@ SANITIZE_SETCONJUGATION
Definition: MolOps.h:432
@ SANITIZE_SYMMRINGS
Definition: MolOps.h:428
@ SANITIZE_ADJUSTHS
Definition: MolOps.h:435
@ SANITIZE_CLEANUPCHIRALITY
Definition: MolOps.h:434
@ SANITIZE_FINDRADICALS
Definition: MolOps.h:430
@ SANITIZE_KEKULIZE
Definition: MolOps.h:429
@ SANITIZE_SETHYBRIDIZATION
Definition: MolOps.h:433
@ SANITIZE_CLEANUP
Definition: MolOps.h:426
RDKIT_GRAPHMOL_EXPORT int countAtomElec(const Atom *at)
RDKIT_GRAPHMOL_EXPORT void detectBondStereochemistry(ROMol &mol, int confId=-1)
RDKIT_GRAPHMOL_EXPORT void sanitizeMol(RWMol &mol, unsigned int &operationThatFailed, unsigned int sanitizeOps=SANITIZE_ALL)
carries out a collection of tasks for cleaning up a molecule and ensuring that it makes "chemical sen...
RDKIT_GRAPHMOL_EXPORT ROMol * removeHs(const ROMol &mol, bool implicitOnly=false, bool updateExplicitCount=false, bool sanitize=true)
returns a copy of a molecule with hydrogens removed
RDKIT_GRAPHMOL_EXPORT void parseAdjustQueryParametersFromJSON(MolOps::AdjustQueryParameters &p, const std::string &json)
updates an AdjustQueryParameters object from a JSON string
RDKIT_GRAPHMOL_EXPORT void removeAllHs(RWMol &mol, bool sanitize=true)
removes all Hs from a molecule
RDKIT_GRAPHMOL_EXPORT ROMol * adjustQueryProperties(const ROMol &mol, const AdjustQueryParameters *params=nullptr)
returns a copy of a molecule with query properties adjusted
RDKIT_GRAPHMOL_EXPORT void setBondStereoFromDirections(ROMol &mol)
Assign CIS/TRANS bond stereochemistry tags based on neighboring directions.
RDKIT_GRAPHMOL_EXPORT ROMol * renumberAtoms(const ROMol &mol, const std::vector< unsigned int > &newOrder)
returns a copy of a molecule with the atoms renumbered
RDKIT_GRAPHMOL_EXPORT int findSSSR(const ROMol &mol, std::vector< std::vector< int >> &res)
finds a molecule's Smallest Set of Smallest Rings
RDKIT_GRAPHMOL_EXPORT bool atomHasConjugatedBond(const Atom *at)
returns whether or not the given Atom is involved in a conjugated bond
RDKIT_GRAPHMOL_EXPORT void cleanupChirality(RWMol &mol)
removes bogus chirality markers (those on non-sp3 centers):
RDKIT_GRAPHMOL_EXPORT void Kekulize(RWMol &mol, bool markAtomsBonds=true, unsigned int maxBackTracks=100)
Kekulizes the molecule.
RDKIT_GRAPHMOL_EXPORT void assignRadicals(RWMol &mol)
Called by the sanitizer to assign radical counts to atoms.
RDKIT_GRAPHMOL_EXPORT void findPotentialStereoBonds(ROMol &mol, bool cleanIt=false)
finds bonds that could be cis/trans in a molecule and mark them as Bond::STEREOANY.
RDKIT_GRAPHMOL_EXPORT void setHybridization(ROMol &mol)
calculates and sets the hybridization of all a molecule's Stoms
RDKIT_GRAPHMOL_EXPORT std::list< int > getShortestPath(const ROMol &mol, int aid1, int aid2)
Find the shortest path between two atoms.
RDKIT_GRAPHMOL_EXPORT unsigned getNumAtomsWithDistinctProperty(const ROMol &mol, std::string prop)
returns the number of atoms which have a particular property set
RDKIT_GRAPHMOL_EXPORT void assignChiralTypesFrom3D(ROMol &mol, int confId=-1, bool replaceExistingTags=true)
Uses a conformer to assign ChiralType to a molecule's atoms.
RDKIT_GRAPHMOL_EXPORT int symmetrizeSSSR(ROMol &mol, std::vector< std::vector< int >> &res)
symmetrize the molecule's Smallest Set of Smallest Rings
RDKIT_GRAPHMOL_EXPORT void setConjugation(ROMol &mol)
flags the molecule's conjugated bonds
RDKIT_GRAPHMOL_EXPORT void setDoubleBondNeighborDirections(ROMol &mol, const Conformer *conf=nullptr)
Sets bond directions based on double bond stereochemistry.
AdjustQueryWhichFlags
Definition: MolOps.h:304
@ ADJUST_IGNORERINGS
Definition: MolOps.h:307
@ ADJUST_IGNORENONE
Definition: MolOps.h:305
@ ADJUST_IGNOREMAPPED
Definition: MolOps.h:310
@ ADJUST_IGNORENONDUMMIES
Definition: MolOps.h:309
@ ADJUST_IGNOREDUMMIES
Definition: MolOps.h:308
@ ADJUST_IGNORECHAINS
Definition: MolOps.h:306
@ ADJUST_IGNOREALL
Definition: MolOps.h:311
Std stuff.
Definition: Abbreviations.h:18
std::vector< double > INVAR_VECT
Definition: MolOps.h:31
INVAR_VECT::iterator INVAR_VECT_I
Definition: MolOps.h:33
INVAR_VECT::const_iterator INVAR_VECT_CI
Definition: MolOps.h:34
std::vector< UINT > UINT_VECT
Definition: types.h:296
Parameters controlling the behavior of MolOps::adjustQueryProperties.
Definition: MolOps.h:323
static AdjustQueryParameters noAdjustments()
returns an AdjustQueryParameters object with all adjustments disabled
Definition: MolOps.h:373