RDKit
Open-source cheminformatics and machine learning.
GenericGroups.h
Go to the documentation of this file.
1 //
2 // Copyright (C) 2021 Greg Landrum
3 //
4 // @@ All Rights Reserved @@
5 // This file is part of the RDKit.
6 // The contents are covered by the terms of the BSD license
7 // which is included in the file license.txt, found at the root
8 // of the RDKit source tree.
9 //
10 #include <RDGeneral/export.h>
11 #ifndef RD_GENERICGROUPS_H
12 #define RD_GENERICGROUPS_H
13 
14 #include <vector>
15 #include <functional>
16 #include <map>
17 #include <boost/dynamic_bitset.hpp>
18 
19 namespace RDKit {
20 class ROMol;
21 class Atom;
22 class Bond;
23 
24 namespace GenericGroups {
25 // We'd like to be able to correctly interpret what's written by Marvin and
26 // MarvinJS, so the conditions for these are adapted from the ChemAxon
27 // documentation for homology groups
28 // (https://docs.chemaxon.com/display/docs/homology-groups.md)
29 //
30 // If I had questions about what the queries should do, I ran example in Reaxys
31 // with MarvinJS as the sketcher to see what that returns.
32 //
33 // I've tried to document deviations or surprises
34 
35 namespace Matchers {
36 //! Matches alkyl side chains
37 /*!
38 
39  Conditions:
40  - side chain consists entirely of carbon or hydrogen
41  - at least one carbon is present
42  - all bonds are single
43  - no ring bonds
44 
45 */
47  const ROMol &mol, const Atom &atom, boost::dynamic_bitset<> ignore);
48 //! Matches alkenyl side chains
49 /*!
50 
51  Conditions:
52  - side chain consists entirely of carbon or hydrogen
53  - contains at least one C=C
54  - no ring bonds
55 
56 */
58  const ROMol &mol, const Atom &atom, boost::dynamic_bitset<> ignore);
59 //! Matches alkynyl side chains
60 /*!
61 
62  Conditions:
63  - side chain consists entirely of carbon or hydrogen
64  - contains at least one C#C
65  - no ring bonds
66 
67 */
69  const ROMol &mol, const Atom &atom, boost::dynamic_bitset<> ignore);
70 
71 //! Matches carbocyclic side chains
72 /*!
73 
74  Note: this is Reaxys query type CBC and matches carbocycles
75 
76  Conditions:
77  - atom is in at least one ring composed entirely of carbon
78  - atom is not in any rings not compatible with the above conditions
79  - additional fused rings in the system must obey the same rules
80 
81 
82 */
84  const ROMol &mol, const Atom &atom, boost::dynamic_bitset<> ignore);
85 //! Matches cycloalkyl side chains
86 /*!
87 
88  Note: this is Reaxys query type CAL and is directly equivalent to alkyl,
89  except the immediate atom needs to be in a ring.
90 
91 
92  Conditions:
93  - atom is in at least one ring composed entirely of carbon and connected
94  with single bonds
95  - atoms in the ring do not have unsaturations (including exocyclic)
96  - atom is not in any rings not compatible with the above conditions
97  - additional fused rings in the system must obey the same rules (i.e. all
98  single bonds)
99 
100 
101 */
103  const ROMol &mol, const Atom &atom, boost::dynamic_bitset<> ignore);
104 //! Matches cycloalkenyl side chains
105 /*!
106 
107  Note: this is Reaxys query type CEL and matches carbocycles which have at
108  least one double or aromatic bond.
109 
110  Conditions:
111  - atom is in at least one ring composed entirely of carbon and with at least
112  one double or aromatic bond
113  - atom is not in any rings not compatible with the above conditions
114  - additional fused rings in the system must obey the same rules (including
115  that each ring must have at least one double or aromatic bond)
116 
117 
118 */
120  const ROMol &mol, const Atom &atom, boost::dynamic_bitset<> ignore);
121 
122 //! Matches heterocyclic side chains
123 /*!
124 
125  Note: this is Reaxys query type CHC and matches heterocycles
126 
127  Conditions:
128  - atom is in at least one fused ring with a heteroatom
129 
130 
131 */
133  const ROMol &mol, const Atom &atom, boost::dynamic_bitset<> ignore);
134 
135 //! Matches aryl side chains
136 /*!
137 
138  Note: this is Reaxys query type ARY and matches carbocycles which are aromatic
139 
140  Conditions:
141  - atom is in at least one aromatic ring composed entirely of carbon
142  - atom is not in any rings not compatible with the above conditions
143  - additional fused rings in the system must obey the same rules
144 
145 
146 */
148  const ROMol &mol, const Atom &atom, boost::dynamic_bitset<> ignore);
149 
150 //! Matches heteroaryl side chains
151 /*!
152 
153  Note: this is Reaxys query type HAR and matches aromatic heterocycles
154 
155  Conditions:
156  - atom is in at least one fused aromatic sytem with a heteroatom
157 
158 
159 */
161  const ROMol &mol, const Atom &atom, boost::dynamic_bitset<> ignore);
162 
163 //! Matches cyclic side chains
164 /*!
165 
166  Note: this is Reaxys query type CYC and matches cycles
167 
168  Conditions:
169  - atom is in at least one ring
170 
171 */
173  const ROMol &mol, const Atom &atom, boost::dynamic_bitset<> ignore);
174 
175 //! Matches acyclic side chains
176 /*!
177 
178  Note: this is Reaxys query type ACY and matches sidechains with no cycles
179 
180  Conditions:
181  - no atom in the sidechain is in a ring
182 
183 */
185  const ROMol &mol, const Atom &atom, boost::dynamic_bitset<> ignore);
186 
187 //! Matches all-carbon acyclic side chains
188 /*!
189 
190  Note: this is Reaxys query type ABC and matches all-carbon sidechains with no
191  cycles
192 
193  Conditions:
194  - all atoms in the sidechain are carbon
195  - no atom in the sidechain is in a ring
196 
197 */
199  const ROMol &mol, const Atom &atom, boost::dynamic_bitset<> ignore);
200 
201 //! Matches acyclic side chains with at least one heteroatom
202 /*!
203 
204  Note: this is Reaxys query type AHC and matches sidechains with no cycles and
205  at least one heteroatom
206 
207  Conditions:
208  - at least one non-carbon, non-hydrogen atom is in the sidechain
209  - no atom in the sidechain is in a ring
210 
211 */
213  const ROMol &mol, const Atom &atom, boost::dynamic_bitset<> ignore);
214 //! Matches acyclic alkoxy side chains
215 /*!
216 
217  Note: this is Reaxys query type AOX and matches alkoxy sidechains
218 
219  Conditions:
220  - first atom is an O
221  - all other atoms are C
222  - all single bonds
223  - no atom in the sidechain is in a ring
224 
225 */
227  const ROMol &mol, const Atom &atom, boost::dynamic_bitset<> ignore);
228 
229 //! Matches rings without carbon
230 /*!
231 
232  Note: this is Reaxys query type CXX and matches rings which contain no carbon
233 
234  Conditions:
235  - a ring is present
236  - none of the atoms in the fused ring system are carbon
237 
238 */
240  const ROMol &mol, const Atom &atom, boost::dynamic_bitset<> ignore);
241 } // namespace Matchers
242 const static std::map<
243  std::string,
244  std::function<bool(const ROMol &, const Atom &, boost::dynamic_bitset<>)>>
246  {"Alkyl", Matchers::AlkylAtomMatcher},
248  {"Alkenyl", Matchers::AlkenylAtomMatcher},
250  {"Alkynyl", Matchers::AlkynylAtomMatcher},
252  {"Carbocyclic", Matchers::CarbocyclicAtomMatcher},
254  {"Carbocycloalkyl", Matchers::CarbocycloalkylAtomMatcher},
256  {"Carbocycloalkenyl", Matchers::CarbocycloalkenylAtomMatcher},
258  {"Carboaryl", Matchers::CarboarylAtomMatcher},
260  {"Cyclic", Matchers::CyclicAtomMatcher},
262  {"Acyclic", Matchers::AcyclicAtomMatcher},
264  {"Carboacyclic", Matchers::CarboacyclicAtomMatcher},
266  {"Heteroacyclic", Matchers::HeteroacyclicAtomMatcher},
270  {"Heterocyclic", Matchers::HeterocyclicAtomMatcher},
272  {"Heteroaryl", Matchers::HeteroarylAtomMatcher},
274  {"NoCarbonRing", Matchers::NoCarbonRingAtomMatcher},
276 };
277 //! returns false if any of the molecule's generic atoms are not satisfied in
278 /// the current match
280  const ROMol &mol, const ROMol &query,
281  const std::vector<unsigned int> &match);
282 //! sets the apropriate generic query tags based on atom labels and/or SGroups
283 /*
284 
285 - Generic query tags found in the atom labels/SGroups will be overwrite existing
286 generic query tags (if there are any present).
287 - only SUP SGroups are considered
288 - Any atom labels or SGroups which are converted will be removed
289 - If both atom labels and SGroups are being used and an atom has generic
290 query tags in both, the one from the SGroup will be used.
291 - Generic query tags not found in GenericGroups::genericMatchers will be ignored
292 
293 */
295  ROMol &mol, bool useAtomLabels = true, bool useSGroups = true);
297  ROMol &mol);
298 } // namespace GenericGroups
299 } // namespace RDKit
300 
301 #endif
The class for representing atoms.
Definition: Atom.h:68
#define RDKIT_GENERICGROUPS_EXPORT
Definition: export.h:209
RDKIT_GENERICGROUPS_EXPORT bool CyclicAtomMatcher(const ROMol &mol, const Atom &atom, boost::dynamic_bitset<> ignore)
Matches cyclic side chains.
RDKIT_GENERICGROUPS_EXPORT bool AlkylAtomMatcher(const ROMol &mol, const Atom &atom, boost::dynamic_bitset<> ignore)
Matches alkyl side chains.
RDKIT_GENERICGROUPS_EXPORT bool CarbocycloalkylAtomMatcher(const ROMol &mol, const Atom &atom, boost::dynamic_bitset<> ignore)
Matches cycloalkyl side chains.
RDKIT_GENERICGROUPS_EXPORT bool HeteroacyclicAtomMatcher(const ROMol &mol, const Atom &atom, boost::dynamic_bitset<> ignore)
Matches acyclic side chains with at least one heteroatom.
RDKIT_GENERICGROUPS_EXPORT bool HeterocyclicAtomMatcher(const ROMol &mol, const Atom &atom, boost::dynamic_bitset<> ignore)
Matches heterocyclic side chains.
RDKIT_GENERICGROUPS_EXPORT bool CarbocyclicAtomMatcher(const ROMol &mol, const Atom &atom, boost::dynamic_bitset<> ignore)
Matches carbocyclic side chains.
RDKIT_GENERICGROUPS_EXPORT bool AcyclicAtomMatcher(const ROMol &mol, const Atom &atom, boost::dynamic_bitset<> ignore)
Matches acyclic side chains.
RDKIT_GENERICGROUPS_EXPORT bool AlkynylAtomMatcher(const ROMol &mol, const Atom &atom, boost::dynamic_bitset<> ignore)
Matches alkynyl side chains.
RDKIT_GENERICGROUPS_EXPORT bool CarbocycloalkenylAtomMatcher(const ROMol &mol, const Atom &atom, boost::dynamic_bitset<> ignore)
Matches cycloalkenyl side chains.
RDKIT_GENERICGROUPS_EXPORT bool NoCarbonRingAtomMatcher(const ROMol &mol, const Atom &atom, boost::dynamic_bitset<> ignore)
Matches rings without carbon.
RDKIT_GENERICGROUPS_EXPORT bool CarboacyclicAtomMatcher(const ROMol &mol, const Atom &atom, boost::dynamic_bitset<> ignore)
Matches all-carbon acyclic side chains.
RDKIT_GENERICGROUPS_EXPORT bool HeteroarylAtomMatcher(const ROMol &mol, const Atom &atom, boost::dynamic_bitset<> ignore)
Matches heteroaryl side chains.
RDKIT_GENERICGROUPS_EXPORT bool AlkoxyacyclicAtomMatcher(const ROMol &mol, const Atom &atom, boost::dynamic_bitset<> ignore)
Matches acyclic alkoxy side chains.
RDKIT_GENERICGROUPS_EXPORT bool AlkenylAtomMatcher(const ROMol &mol, const Atom &atom, boost::dynamic_bitset<> ignore)
Matches alkenyl side chains.
RDKIT_GENERICGROUPS_EXPORT bool CarboarylAtomMatcher(const ROMol &mol, const Atom &atom, boost::dynamic_bitset<> ignore)
Matches aryl side chains.
RDKIT_GENERICGROUPS_EXPORT bool genericAtomMatcher(const ROMol &mol, const ROMol &query, const std::vector< unsigned int > &match)
RDKIT_GENERICGROUPS_EXPORT void setGenericQueriesFromProperties(ROMol &mol, bool useAtomLabels=true, bool useSGroups=true)
sets the apropriate generic query tags based on atom labels and/or SGroups
static const std::map< std::string, std::function< bool(const ROMol &, const Atom &, boost::dynamic_bitset<>)> > genericMatchers
RDKIT_GENERICGROUPS_EXPORT void convertGenericQueriesToSubstanceGroups(ROMol &mol)
Std stuff.
Definition: Abbreviations.h:18