RDKit
Open-source cheminformatics and machine learning.
MorganFingerprints.h
Go to the documentation of this file.
1 //
2 //
3 // Copyright (c) 2009-2010, Novartis Institutes for BioMedical Research Inc.
4 // All rights reserved.
5 //
6 // Redistribution and use in source and binary forms, with or without
7 // modification, are permitted provided that the following conditions are
8 // met:
9 //
10 // * Redistributions of source code must retain the above copyright
11 // notice, this list of conditions and the following disclaimer.
12 // * Redistributions in binary form must reproduce the above
13 // copyright notice, this list of conditions and the following
14 // disclaimer in the documentation and/or other materials provided
15 // with the distribution.
16 // * Neither the name of Novartis Institutes for BioMedical Research Inc.
17 // nor the names of its contributors may be used to endorse or promote
18 // products derived from this software without specific prior written
19 // permission.
20 //
21 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32 //
33 // Created by Greg Landrum, July 2008
34 //
35 //
36 
37 /*! \file MorganFingerprints.h
38 
39 */
40 #include <RDGeneral/export.h>
41 #ifndef __RD_MORGANFPS_H__
42 #define __RD_MORGANFPS_H__
43 
44 #include <vector>
45 #include <map>
48 #include <cstdint>
50 
51 namespace RDKit {
52 class ROMol;
53 namespace MorganFingerprints {
54 typedef std::map<std::uint32_t,
55  std::vector<std::pair<std::uint32_t, std::uint32_t>>>
57 
58 const std::string morganFingerprintVersion = "1.0.0";
59 
60 //! returns the Morgan fingerprint for a molecule
61 /*!
62  These fingerprints are similar to the well-known ECFP or
63  FCFP fingerprints, depending on which invariants are used.
64 
65  The algorithm used is described in the paper
66  Rogers, D. & Hahn, M. Extended-Connectivity Fingerprints. JCIM 50:742-54
67  (2010)
68  https://doi.org/10.1021/ci100050t
69 
70  The original implementation was done using this paper:
71  D. Rogers, R.D. Brown, M. Hahn J. Biomol. Screen. 10:682-6 (2005)
72  and an unpublished technical report:
73  http://www.ics.uci.edu/~welling/teaching/ICS274Bspring06/David%20Rogers%20-%20ECFP%20Manuscript.doc
74 
75  \param mol: the molecule to be fingerprinted
76  \param radius: the number of iterations to grow the fingerprint
77  \param invariants : optional pointer to a set of atom invariants to
78  be used. By default ECFP-type invariants are used
79  (calculated by getConnectivityInvariants())
80  \param fromAtoms : if this is provided, only the atoms in the vector will be
81  used as centers in the fingerprint
82  \param useChirality : if set, additional information will be added to the
83  fingerprint
84  when chiral atoms are discovered. This will cause
85  \verbatim C[C@H](F)Cl,
86  C[C@@H](F)Cl, and CC(F)Cl \endverbatim to generate
87  different fingerprints.
88  \param useBondTypes : if set, bond types will be included as part of the hash
89  for
90  calculating bits
91  \param useCounts : if set, counts of the features will be used
92  \param onlyNonzeroInvariants : if set, bits will only be set from atoms that
93  have a nonzero invariant.
94  \param atomsSettingBits : if nonzero, this will be used to return information
95  about the atoms that set each particular bit.
96  The keys are the map are bit ids, the values
97  are lists of (atomId, radius) pairs.
98  \param includeRedundantEnvironments : if set, the check for redundant atom
99  environments will not be done.
100 
101  \return a pointer to the fingerprint. The client is
102  responsible for calling delete on this.
103 
104 */
106  const ROMol &mol, unsigned int radius,
107  std::vector<boost::uint32_t> *invariants = nullptr,
108  const std::vector<boost::uint32_t> *fromAtoms = nullptr,
109  bool useChirality = false, bool useBondTypes = true, bool useCounts = true,
110  bool onlyNonzeroInvariants = false, BitInfoMap *atomsSettingBits = nullptr,
111  bool includeRedundantEnvironments = false);
112 
113 //! returns the Morgan fingerprint for a molecule
114 /*!
115  These fingerprints are similar to the well-known ECFP or
116  FCFP fingerprints, depending on which invariants are used.
117 
118  The algorithm used is described in the paper
119  Rogers, D. & Hahn, M. Extended-Connectivity Fingerprints. JCIM 50:742-54
120  (2010)
121  https://doi.org/10.1021/ci100050t
122 
123  The original implementation was done using this paper:
124  D. Rogers, R.D. Brown, M. Hahn J. Biomol. Screen. 10:682-6 (2005)
125  and an unpublished technical report:
126  http://www.ics.uci.edu/~welling/teaching/ICS274Bspring06/David%20Rogers%20-%20ECFP%20Manuscript.doc
127 
128  \param mol: the molecule to be fingerprinted
129  \param radius: the number of iterations to grow the fingerprint
130  \param invariants : optional pointer to a set of atom invariants to
131  be used. By default ECFP-type invariants are used
132  (calculated by getConnectivityInvariants())
133  \param fromAtoms : if this is provided, only the atoms in the vector will be
134  used as centers in the fingerprint
135  \param useChirality : if set, additional information will be added to the
136  fingerprint
137  when chiral atoms are discovered. This will cause
138  \verbatim C[C@H](F)Cl,
139  C[C@@H](F)Cl, and CC(F)Cl \endverbatim to generate
140  different fingerprints.
141  \param useBondTypes : if set, bond types will be included as part of the hash
142  for
143  calculating bits
144  \param onlyNonzeroInvariants : if set, bits will only be set from atoms that
145  have a nonzero invariant.
146  \param atomsSettingBits : if nonzero, this will be used to return information
147  about the atoms that set each particular bit.
148  The keys are the map are bit ids, the values
149  are lists of (atomId, radius) pairs.
150  \param includeRedundantEnvironments : if set, the check for redundant atom
151  environments will not be done.
152 
153  \return a pointer to the fingerprint. The client is
154  responsible for calling delete on this.
155 
156 */
158  const ROMol &mol, unsigned int radius, unsigned int nBits = 2048,
159  std::vector<boost::uint32_t> *invariants = nullptr,
160  const std::vector<boost::uint32_t> *fromAtoms = nullptr,
161  bool useChirality = false, bool useBondTypes = true,
162  bool onlyNonzeroInvariants = false, BitInfoMap *atomsSettingBits = nullptr,
163  bool includeRedundantEnvironments = false);
164 
165 //! returns the Morgan fingerprint for a molecule as a bit vector
166 /*!
167  see documentation for getFingerprint() for theory/references
168 
169  \param mol: the molecule to be fingerprinted
170  \param radius: the number of iterations to grow the fingerprint
171  \param nBits: the number of bits in the final fingerprint
172  \param invariants : optional pointer to a set of atom invariants to
173  be used. By default ECFP-type invariants are used
174  (calculated by getConnectivityInvariants())
175  \param fromAtoms : if this is provided, only the atoms in the vector will be
176  used as centers in the fingerprint
177  \param useChirality : if set, additional information will be added to the
178  fingerprint
179  when chiral atoms are discovered. This will cause
180  \verbatim C[C@H](F)Cl,
181  C[C@@H](F)Cl, and CC(F)Cl \endverbatim to generate
182  different fingerprints.
183  \param useBondTypes : if set, bond types will be included as part of the hash
184  for
185  calculating bits
186  \param onlyNonzeroInvariants : if set, bits will only be set from atoms that
187  have a nonzero invariant.
188  \param atomsSettingBits : if nonzero, this will be used to return information
189  about the atoms that set each particular bit.
190  The keys are the map are bit ids, the values
191  are lists of (atomId, radius) pairs.
192  \param includeRedundantEnvironments : if set, the check for redundant atom
193  environments will not be done.
194 
195  \return a pointer to the fingerprint. The client is
196  responsible for calling delete on this.
197 
198 */
200  const ROMol &mol, unsigned int radius, unsigned int nBits,
201  std::vector<std::uint32_t> *invariants = nullptr,
202  const std::vector<std::uint32_t> *fromAtoms = nullptr,
203  bool useChirality = false, bool useBondTypes = true,
204  bool onlyNonzeroInvariants = false, BitInfoMap *atomsSettingBits = nullptr,
205  bool includeRedundantEnvironments = false);
206 
207 } // end of namespace MorganFingerprints
208 } // namespace RDKit
209 
210 #endif
a class for bit vectors that are densely occupied
a class for efficiently storing sparse vectors of ints
Definition: SparseIntVect.h:28
#define RDKIT_FINGERPRINTS_EXPORT
Definition: export.h:169
RDKIT_FINGERPRINTS_EXPORT SparseIntVect< std::uint32_t > * getFingerprint(const ROMol &mol, unsigned int radius, std::vector< boost::uint32_t > *invariants=nullptr, const std::vector< boost::uint32_t > *fromAtoms=nullptr, bool useChirality=false, bool useBondTypes=true, bool useCounts=true, bool onlyNonzeroInvariants=false, BitInfoMap *atomsSettingBits=nullptr, bool includeRedundantEnvironments=false)
returns the Morgan fingerprint for a molecule
std::map< std::uint32_t, std::vector< std::pair< std::uint32_t, std::uint32_t > > > BitInfoMap
RDKIT_FINGERPRINTS_EXPORT ExplicitBitVect * getFingerprintAsBitVect(const ROMol &mol, unsigned int radius, unsigned int nBits, std::vector< std::uint32_t > *invariants=nullptr, const std::vector< std::uint32_t > *fromAtoms=nullptr, bool useChirality=false, bool useBondTypes=true, bool onlyNonzeroInvariants=false, BitInfoMap *atomsSettingBits=nullptr, bool includeRedundantEnvironments=false)
returns the Morgan fingerprint for a molecule as a bit vector
const std::string morganFingerprintVersion
RDKIT_FINGERPRINTS_EXPORT SparseIntVect< std::uint32_t > * getHashedFingerprint(const ROMol &mol, unsigned int radius, unsigned int nBits=2048, std::vector< boost::uint32_t > *invariants=nullptr, const std::vector< boost::uint32_t > *fromAtoms=nullptr, bool useChirality=false, bool useBondTypes=true, bool onlyNonzeroInvariants=false, BitInfoMap *atomsSettingBits=nullptr, bool includeRedundantEnvironments=false)
returns the Morgan fingerprint for a molecule
Std stuff.
Definition: Abbreviations.h:18