RDKit
Open-source cheminformatics and machine learning.
SubstanceGroup.h
Go to the documentation of this file.
1 //
2 //
3 // Copyright (C) 2018-2020 Greg Landrum and T5 Informatics GmbH
4 //
5 // @@ All Rights Reserved @@
6 // This file is part of the RDKit.
7 // The contents are covered by the terms of the BSD license
8 // which is included in the file license.txt, found at the root
9 // of the RDKit source tree.
10 //
11 /*! \file SubstanceGroup.h
12 
13  \brief Defines the SubstanceGroup class
14 
15 */
16 #include <RDGeneral/export.h>
17 #ifndef _RD_SGROUP_H
18 #define _RD_SGROUP_H
19 
20 #include <utility>
21 #include <unordered_map>
22 
23 #include <Geometry/point.h>
24 #include <RDGeneral/types.h>
25 #include <RDGeneral/RDProps.h>
26 #include <boost/smart_ptr.hpp>
27 
28 namespace RDKit {
29 class ROMol;
30 class RWMol;
31 class Bond;
32 class Atom;
33 
34 //! used to indicate errors from incorrect sgroup access
36  : public std::runtime_error {
37  public:
38  //! construct with an error message
39  SubstanceGroupException(const char *msg) : std::runtime_error(msg) {}
40  //! construct with an error message
41  SubstanceGroupException(const std::string &msg) : std::runtime_error(msg) {}
42 };
43 
44 //! The class for representing SubstanceGroups
45 /*!
46  <b>Notes:</b>
47  - These are inspired by the SGroups in the MDL formats
48  - Implementation is based on 2010 MDL SD specification:
49  http://infochim.u-strasbg.fr/recherche/Download/Fragmentor/MDL_SDF.pdf
50  - See SGroups.md for further, more comprehensive notes.
51 
52 */
53 
55  public:
56  //! Bond type (see V3000 spec)
57  enum class BondType {
58  XBOND, // External/Crossing bond
59  CBOND, // Internal/Contained bond
60  };
61 
62  typedef std::array<RDGeom::Point3D, 3> Bracket;
63 
64  //! Data structure for SAP lines (see V3000 spec)
65  //! lvIdx may not be set; this signaled with value -1
66  struct AttachPoint {
67  unsigned int aIdx;
68  int lvIdx;
69  std::string id;
70  bool operator==(const AttachPoint &other) const {
71  return aIdx == other.aIdx && lvIdx == other.lvIdx && id == other.id;
72  }
73  };
74 
75  //! See specification for V3000 CSTATE
76  //! vector may or not be considered, depending on TYPE
77  struct CState {
78  unsigned int bondIdx;
80  bool operator==(const CState &other) const {
81  // note that we ignore coordinates for this
82  return bondIdx == other.bondIdx;
83  }
84  };
85 
86 //! No default constructor
87 #ifndef SWIG
88  // Unfortunately, SWIG generated wrapper code uses temporary variables that
89  // require a default ctor not be deleted.
90  SubstanceGroup() = delete;
91 #endif // !SWIG
92 
93  //! Main Constructor. Ownership is only set on this side of the relationship:
94  //! mol->addSubstanceGroup(sgroup) still needs to be called to get ownership
95  //! on the other side.
96  SubstanceGroup(ROMol *owning_mol, const std::string &type);
97 
98  SubstanceGroup(const SubstanceGroup &other) = default;
99  SubstanceGroup &operator=(const SubstanceGroup &other) = default;
100 
101  SubstanceGroup(SubstanceGroup &&other) noexcept : RDProps(std::move(other)) {
102  dp_mol = std::exchange(other.dp_mol, nullptr);
103  d_atoms = std::move(other.d_atoms);
104  d_patoms = std::move(other.d_patoms);
105  d_bonds = std::move(other.d_bonds);
106  d_brackets = std::move(other.d_brackets);
107  d_cstates = std::move(other.d_cstates);
108  d_saps = std::move(other.d_saps);
109  }
110 
112  if (this == &other) {
113  return *this;
114  }
115  RDProps::operator=(std::move(other));
116  dp_mol = std::exchange(other.dp_mol, nullptr);
117  d_atoms = std::move(other.d_atoms);
118  d_patoms = std::move(other.d_patoms);
119  d_bonds = std::move(other.d_bonds);
120  d_brackets = std::move(other.d_brackets);
121  d_cstates = std::move(other.d_cstates);
122  d_saps = std::move(other.d_saps);
123  return *this;
124  }
125 
126  //! Destructor
127  ~SubstanceGroup() = default;
128 
129  //! returns whether or not this belongs to a molecule
130  bool hasOwningMol() const { return dp_mol != nullptr; }
131 
132  //! Get the molecule that owns this instance
133  ROMol &getOwningMol() const {
134  PRECONDITION(dp_mol, "no owner");
135  return *dp_mol;
136  }
137 
138  //! returns whether or not this group is valid; invalid groups must be
139  //! ignored.
140  bool getIsValid() const { return d_isValid; }
141 
142  //! set whether or not this group is valid; invalid groups must be ignored.
143  void setIsValid(bool isValid) { d_isValid = isValid; }
144 
145  //! get the index of this sgroup in dp_mol's sgroups vector
146  //! (do not mistake this by the ID!)
147  unsigned int getIndexInMol() const;
148 
149  /* Atom and Bond methods */
150  void addAtomWithIdx(unsigned int idx);
151  void addParentAtomWithIdx(unsigned int idx);
152  void addBondWithIdx(unsigned int idx);
153  void addAtomWithBookmark(int mark);
155  void addBondWithBookmark(int mark);
156 
157  void addBracket(const Bracket &bracket);
158  void addCState(unsigned int bondIdx, const RDGeom::Point3D &vector);
159  void addAttachPoint(unsigned int aIdx, int lvIdx, const std::string &idStr);
160 
161  BondType getBondType(unsigned int bondIdx) const;
162 
163  const std::vector<unsigned int> &getAtoms() const { return d_atoms; }
164  const std::vector<unsigned int> &getParentAtoms() const { return d_patoms; }
165  const std::vector<unsigned int> &getBonds() const { return d_bonds; }
166 
167  void setAtoms(std::vector<unsigned int> atoms) { d_atoms = std::move(atoms); }
168  void setParentAtoms(std::vector<unsigned int> patoms) {
169  d_patoms = std::move(patoms);
170  }
171  void setBonds(std::vector<unsigned int> bonds) { d_bonds = std::move(bonds); }
172 
173  const std::vector<Bracket> &getBrackets() const { return d_brackets; }
174  const std::vector<CState> &getCStates() const { return d_cstates; }
175  const std::vector<AttachPoint> &getAttachPoints() const { return d_saps; }
176 
177  std::vector<Bracket> &getBrackets() { return d_brackets; }
178  std::vector<CState> &getCStates() { return d_cstates; }
179  std::vector<AttachPoint> &getAttachPoints() { return d_saps; }
180 
181  void clearBrackets() { d_brackets.clear(); }
182  void clearCStates() { d_cstates.clear(); }
183  void clearAttachPoints() { d_saps.clear(); }
184 
185  //! adjusts our atom IDs to reflect that an atom has been removed from the
186  //! parent molecule
187  //! decrements all atom IDs that are higher than \c atomIdx
188  //! raises a \c SubstanceGroupException if \c atomIdx is actually part of
189  //! this substance group
190  //! \returns whether or not anything was changed
191  bool adjustToRemovedAtom(unsigned int atomIdx);
192 
193  //! \returns whether or not anything the specified atom is part of the
194  //! definition of this substance group
195  bool includesAtom(unsigned int atomIdx) const;
196 
197  //! adjusts our bond IDs to reflect that a bond has been removed from the
198  //! parent molecule
199  //! decrements all bond IDs that are higher than \c bondIdx
200  //! raises a \c SubstanceGroupException if \c bondIdx is actually part of
201  //! this substance group
202  //! \returns whether or not anything was changed
203  bool adjustToRemovedBond(unsigned int bondIdx);
204 
205  //! \returns whether or not anything the specified bond is part of the
206  //! definition of this substance group
207  bool includesBond(unsigned int bondIdx) const;
208 
209  //! Set owning molecule
210  //! This only updates atoms and bonds; parent sgroup has to be updated
211  //! independently, since parent might not exist at the time this is
212  //! called.
213  void setOwningMol(ROMol *mol);
214 
215  bool operator==(const SubstanceGroup &other) const {
216  // we ignore brackets and cstates, which involve coordinates
217  return dp_mol == other.dp_mol && d_atoms == other.d_atoms &&
218  d_patoms == other.d_patoms && d_bonds == other.d_bonds &&
219  d_saps == other.d_saps;
220  }
221 
222  private:
223  ROMol *dp_mol = nullptr; // owning molecule
224 
225  bool d_isValid = true;
226 
227  std::vector<unsigned int> d_atoms;
228  std::vector<unsigned int> d_patoms;
229  std::vector<unsigned int> d_bonds;
230 
231  std::vector<Bracket> d_brackets;
232  std::vector<CState> d_cstates;
233  std::vector<AttachPoint> d_saps;
234 }; // namespace RDKit
235 
236 namespace SubstanceGroupChecks {
237 
238 const std::vector<std::string> sGroupTypes = {
239  // polymer sgroups:
240  "SRU", "MON", "COP", "CRO", "GRA", "MOD", "MER", "ANY",
241  // formulations/mixtures:
242  "COM", "MIX", "FOR",
243  // other
244  "SUP", "MUL", "DAT", "GEN"};
245 
246 const std::vector<std::string> sGroupSubtypes = {"ALT", "RAN", "BLO"};
247 const std::vector<std::string> sGroupConnectTypes = {"HH", "HT", "EU"};
248 
249 RDKIT_GRAPHMOL_EXPORT bool isValidType(const std::string &type);
250 
251 RDKIT_GRAPHMOL_EXPORT bool isValidSubType(const std::string &type);
252 
253 RDKIT_GRAPHMOL_EXPORT bool isValidConnectType(const std::string &type);
254 
256  unsigned int id);
257 
258 } // namespace SubstanceGroupChecks
259 
260 //! \name SubstanceGroups and molecules
261 //@{
262 
263 RDKIT_GRAPHMOL_EXPORT std::vector<SubstanceGroup> &getSubstanceGroups(
264  ROMol &mol);
265 RDKIT_GRAPHMOL_EXPORT const std::vector<SubstanceGroup> &getSubstanceGroups(
266  const ROMol &mol);
267 
268 //! Add a new SubstanceGroup. A copy is added, so we can be sure that no other
269 //! references to the SubstanceGroup exist.
270 /*!
271  \param sgroup - SubstanceGroup to be added to the molecule.
272 */
274  SubstanceGroup sgroup);
275 
276 //! Removes SubstanceGroups which reference a particular atom index
277 /*!
278  \param mol - molecule to be edited.
279  \param idx - atom index
280 */
282  RWMol &mol, unsigned int idx);
283 //! Removes SubstanceGroups which reference a particular bond index
284 /*!
285  \param mol - molecule to be edited.
286  \param idx - bond index
287 */
289  RWMol &mol, unsigned int idx);
290 //@}
291 
292 } // namespace RDKit
293 
294 //! allows SubstanceGroup objects to be dumped to streams
295 RDKIT_GRAPHMOL_EXPORT std::ostream &operator<<(std::ostream &target,
296  const RDKit::SubstanceGroup &sg);
297 #endif
#define PRECONDITION(expr, mess)
Definition: Invariant.h:109
RDKIT_GRAPHMOL_EXPORT std::ostream & operator<<(std::ostream &target, const RDKit::SubstanceGroup &sg)
allows SubstanceGroup objects to be dumped to streams
RDProps & operator=(const RDProps &rhs)
Definition: RDProps.h:24
RWMol is a molecule class that is intended to be edited.
Definition: RWMol.h:32
used to indicate errors from incorrect sgroup access
SubstanceGroupException(const std::string &msg)
construct with an error message
SubstanceGroupException(const char *msg)
construct with an error message
The class for representing SubstanceGroups.
const std::vector< Bracket > & getBrackets() const
void addBondWithIdx(unsigned int idx)
void setOwningMol(ROMol *mol)
void setParentAtoms(std::vector< unsigned int > patoms)
std::vector< AttachPoint > & getAttachPoints()
void addAttachPoint(unsigned int aIdx, int lvIdx, const std::string &idStr)
void setBonds(std::vector< unsigned int > bonds)
~SubstanceGroup()=default
Destructor.
void setIsValid(bool isValid)
set whether or not this group is valid; invalid groups must be ignored.
const std::vector< unsigned int > & getAtoms() const
std::vector< Bracket > & getBrackets()
void addParentAtomWithBookmark(int mark)
const std::vector< unsigned int > & getParentAtoms() const
void setAtoms(std::vector< unsigned int > atoms)
const std::vector< unsigned int > & getBonds() const
bool adjustToRemovedBond(unsigned int bondIdx)
void addCState(unsigned int bondIdx, const RDGeom::Point3D &vector)
const std::vector< CState > & getCStates() const
SubstanceGroup()=delete
No default constructor.
bool adjustToRemovedAtom(unsigned int atomIdx)
bool operator==(const SubstanceGroup &other) const
BondType
Bond type (see V3000 spec)
SubstanceGroup(const SubstanceGroup &other)=default
SubstanceGroup(SubstanceGroup &&other) noexcept
const std::vector< AttachPoint > & getAttachPoints() const
ROMol & getOwningMol() const
Get the molecule that owns this instance.
std::vector< CState > & getCStates()
SubstanceGroup & operator=(const SubstanceGroup &other)=default
void addBondWithBookmark(int mark)
void addAtomWithBookmark(int mark)
bool includesAtom(unsigned int atomIdx) const
SubstanceGroup(ROMol *owning_mol, const std::string &type)
void addParentAtomWithIdx(unsigned int idx)
void addAtomWithIdx(unsigned int idx)
std::array< RDGeom::Point3D, 3 > Bracket
void addBracket(const Bracket &bracket)
bool hasOwningMol() const
returns whether or not this belongs to a molecule
SubstanceGroup & operator=(SubstanceGroup &&other) noexcept
bool includesBond(unsigned int bondIdx) const
BondType getBondType(unsigned int bondIdx) const
unsigned int getIndexInMol() const
#define RDKIT_GRAPHMOL_EXPORT
Definition: export.h:217
RDKIT_GRAPHMOL_EXPORT bool isValidType(const std::string &type)
RDKIT_GRAPHMOL_EXPORT bool isValidSubType(const std::string &type)
const std::vector< std::string > sGroupConnectTypes
RDKIT_GRAPHMOL_EXPORT bool isSubstanceGroupIdFree(const ROMol &mol, unsigned int id)
RDKIT_GRAPHMOL_EXPORT bool isValidConnectType(const std::string &type)
const std::vector< std::string > sGroupSubtypes
const std::vector< std::string > sGroupTypes
Std stuff.
Definition: Abbreviations.h:18
RDKIT_GRAPHMOL_EXPORT unsigned int addSubstanceGroup(ROMol &mol, SubstanceGroup sgroup)
RDKIT_GRAPHMOL_EXPORT void removeSubstanceGroupsReferencingBond(RWMol &mol, unsigned int idx)
Removes SubstanceGroups which reference a particular bond index.
RDKIT_GRAPHMOL_EXPORT std::vector< SubstanceGroup > & getSubstanceGroups(ROMol &mol)
RDKIT_GRAPHMOL_EXPORT void removeSubstanceGroupsReferencingAtom(RWMol &mol, unsigned int idx)
Removes SubstanceGroups which reference a particular atom index.
bool operator==(const AttachPoint &other) const
bool operator==(const CState &other) const