RDKit
Open-source cheminformatics and machine learning.
AtomPairs.h
Go to the documentation of this file.
1 //
2 // Copyright (C) 2007-2013 Greg Landrum
3 //
4 // @@ All Rights Reserved @@
5 // This file is part of the RDKit.
6 // The contents are covered by the terms of the BSD license
7 // which is included in the file license.txt, found at the root
8 // of the RDKit source tree.
9 //
10 
11 /*! \file AtomPairs.h
12 
13 
14  A few quick notes about fingerprint size and the way chirality is handled in
15  these functions.
16 
17  By default the atom-pair and topologic-torsion fingerprints do not include any
18  information about
19  chirality; the atom invariants only include information about the atomic
20  number,
21  number of pi electrons, and degree.
22  When chirality is included, two additional bits are added to the atom
23  invariants to flag R/S/no
24  chirality. These additional bits change the size of the atom invariants and
25  either the size
26  of the final fingerprint (atom pairs) or the maximum allowed path length
27  (torsions). This means
28  that even fingerprints for achiral molecules are different when
29  includeChirality is true.
30 
31 */
32 #include <RDGeneral/export.h>
33 #ifndef __RD_ATOMPAIRS_H__
34 #define __RD_ATOMPAIRS_H__
35 
37 #include <DataStructs/BitVects.h>
38 #include <cstdint>
40 namespace RDKit {
41 class Atom;
42 
43 namespace AtomPairs {
44 const std::string atomPairsVersion = "1.1.0";
45 
46 //! returns the atom-pair fingerprint for a molecule
47 /*!
48  The algorithm used is described here:
49  R.E. Carhart, D.H. Smith, R. Venkataraghavan; "Atom Pairs as
50  Molecular Features in Structure-Activity Studies: Definition
51  and Applications" JCICS 25, 64-73 (1985).
52 
53 
54  \param mol: the molecule to be fingerprinted
55  \param minLength: minimum distance between atoms to be
56  considered in a pair. Default is 1 bond.
57  \param maxLength: maximum distance between atoms to be
58  considered in a pair.
59  Default is maxPathLen-1 bonds.
60  \param fromAtoms: if provided, only atom pairs that involve
61  the specified atoms will be included in the
62  fingerprint
63  \param ignoreAtoms: if provided, any atom pairs that include
64  the specified atoms will not be included in the
65  fingerprint
66  \param atomInvariants: a list of invariants to use for the atom hashes
67  note: only the first \c codeSize bits of each
68  invariant are used.
69  \param includeChirality: if set, chirality will be used in the atom invariants
70  (note: this is ignored if atomInvariants are
71  provided)
72  \param use2D: if set, the 2D (topological) distance matrix is used.
73  \param confId: the conformation to use if 3D distances are being used
74 
75 
76  \return a pointer to the fingerprint. The client is
77  responsible for calling delete on this.
78 
79 */
81  const ROMol &mol, unsigned int minLength, unsigned int maxLength,
82  const std::vector<std::uint32_t> *fromAtoms = 0,
83  const std::vector<std::uint32_t> *ignoreAtoms = 0,
84  const std::vector<std::uint32_t> *atomInvariants = 0,
85  bool includeChirality = false, bool use2D = true, int confId = -1);
86 //! \overload
88  const ROMol &mol, const std::vector<std::uint32_t> *fromAtoms = 0,
89  const std::vector<std::uint32_t> *ignoreAtoms = 0,
90  const std::vector<std::uint32_t> *atomInvariants = 0,
91  bool includeChirality = false, bool use2D = true, int confId = -1);
92 
93 //! returns the hashed atom-pair fingerprint for a molecule
94 /*!
95  \param mol: the molecule to be fingerprinted
96  \param nBits: the length of the fingerprint to generate
97  \param minLength: minimum distance between atoms to be
98  considered in a pair. Default is 1 bond.
99  \param maxLength: maximum distance between atoms to be
100  considered in a pair.
101  Default is maxPathLen-1 bonds.
102  \param fromAtoms: if provided, only atom pairs that involve
103  the specified atoms will be included in the
104  fingerprint
105  \param ignoreAtoms: if provided, any atom pairs that include
106  the specified atoms will not be included in the
107  fingerprint
108  \param atomInvariants: a list of invariants to use for the atom hashes
109  note: only the first \c codeSize bits of each
110  invariant are used.
111  \param includeChirality: if set, chirality will be used in the atom invariants
112  (note: this is ignored if atomInvariants are
113  provided)
114  \param use2D: if set, the 2D (topological) distance matrix is used.
115 
116  \return a pointer to the fingerprint. The client is
117  responsible for calling delete on this.
118 
119 */
122  const ROMol &mol, unsigned int nBits = 2048, unsigned int minLength = 1,
123  unsigned int maxLength = maxPathLen - 1,
124  const std::vector<std::uint32_t> *fromAtoms = 0,
125  const std::vector<std::uint32_t> *ignoreAtoms = 0,
126  const std::vector<std::uint32_t> *atomInvariants = 0,
127  bool includeChirality = false, bool use2D = true, int confId = -1);
128 //! returns the hashed atom-pair fingerprint for a molecule as a bit vector
129 /*!
130  \param mol: the molecule to be fingerprinted
131  \param nBits: the length of the fingerprint to generate
132  \param minLength: minimum distance between atoms to be
133  considered in a pair. Default is 1 bond.
134  \param maxLength: maximum distance between atoms to be
135  considered in a pair.
136  Default is maxPathLen-1 bonds.
137  \param fromAtoms: if provided, only atom pairs that involve
138  the specified atoms will be included in the
139  fingerprint
140  \param ignoreAtoms: if provided, any atom pairs that include
141  the specified atoms will not be included in the
142  fingerprint
143  \param atomInvariants: a list of invariants to use for the atom hashes
144  note: only the first \c codeSize bits of each
145  invariant are used.
146  \param nBitsPerEntry: number of bits to use in simulating counts
147  \param includeChirality: if set, chirality will be used in the atom invariants
148  (note: this is ignored if atomInvariants are
149  provided)
150  \param use2D: if set, the 2D (topological) distance matrix is used.
151  \param confId: the conformation to use if 3D distances are being used
152 
153  \return a pointer to the fingerprint. The client is
154  responsible for calling delete on this.
155 
156 */
159  const ROMol &mol, unsigned int nBits = 2048, unsigned int minLength = 1,
160  unsigned int maxLength = maxPathLen - 1,
161  const std::vector<std::uint32_t> *fromAtoms = 0,
162  const std::vector<std::uint32_t> *ignoreAtoms = 0,
163  const std::vector<std::uint32_t> *atomInvariants = 0,
164  unsigned int nBitsPerEntry = 4, bool includeChirality = false,
165  bool use2D = true, int confId = -1);
166 
167 //! returns the topological-torsion fingerprint for a molecule
168 /*!
169  The algorithm used is described here:
170  R. Nilakantan, N. Bauman, J. S. Dixon, R. Venkataraghavan;
171  "Topological Torsion: A New Molecular Descriptor for SAR Applications.
172  Comparison with Other Descriptors" JCICS 27, 82-85 (1987).
173 
174  \param mol: the molecule to be fingerprinted
175  \param targetSize: the number of atoms to include in the "torsions"
176  \param fromAtoms: if provided, only torsions that start or end at
177  the specified atoms will be included in the
178  fingerprint
179  \param ignoreAtoms: if provided, any torsions that include
180  the specified atoms will not be included in the
181  fingerprint
182  \param atomInvariants: a list of invariants to use for the atom hashes
183  note: only the first \c codeSize bits of each
184  invariant are used.
185  \param includeChirality: if set, chirality will be used in the atom invariants
186  (note: this is ignored if atomInvariants are
187  provided)
188 
189  \return a pointer to the fingerprint. The client is
190  responsible for calling delete on this.
191 
192 */
195  const ROMol &mol, unsigned int targetSize = 4,
196  const std::vector<std::uint32_t> *fromAtoms = 0,
197  const std::vector<std::uint32_t> *ignoreAtoms = 0,
198  const std::vector<std::uint32_t> *atomInvariants = 0,
199  bool includeChirality = false);
200 //! returns a hashed topological-torsion fingerprint for a molecule
201 /*!
202  The algorithm used is described here:
203  R. Nilakantan, N. Bauman, J. S. Dixon, R. Venkataraghavan;
204  "Topological Torsion: A New Molecular Descriptor for SAR Applications.
205  Comparison with Other Descriptors" JCICS 27, 82-85 (1987).
206 
207  \param mol: the molecule to be fingerprinted
208  \param nBits: number of bits to include in the fingerprint
209  \param targetSize: the number of atoms to include in the "torsions"
210  \param fromAtoms: if provided, only torsions that start or end at
211  the specified atoms will be included in the
212  fingerprint
213  \param ignoreAtoms: if provided, any torsions that include
214  the specified atoms will not be included in the
215  fingerprint
216  \param atomInvariants: a list of invariants to use for the atom hashes
217  note: only the first \c codeSize bits of each
218  invariant are used.
219  \param includeChirality: if set, chirality will be used in the atom invariants
220  (note: this is ignored if atomInvariants are
221  provided)
222 
223  \return a pointer to the fingerprint. The client is
224  responsible for calling delete on this.
225 
226 */
229  const ROMol &mol, unsigned int nBits = 2048, unsigned int targetSize = 4,
230  const std::vector<std::uint32_t> *fromAtoms = 0,
231  const std::vector<std::uint32_t> *ignoreAtoms = 0,
232  const std::vector<std::uint32_t> *atomInvariants = 0,
233  bool includeChirality = false);
234 //! returns a hashed topological-torsion fingerprint for a molecule as a bit
235 // vector
236 /*!
237  \param mol: the molecule to be fingerprinted
238  \param nBits: number of bits to include in the fingerprint
239  \param targetSize: the number of atoms to include in the "torsions"
240  \param fromAtoms: if provided, only torsions that start or end at
241  the specified atoms will be included in the
242  fingerprint
243  \param ignoreAtoms: if provided, any torsions that include
244  the specified atoms will not be included in the
245  fingerprint
246  \param atomInvariants: a list of invariants to use for the atom hashes
247  note: only the first \c codeSize bits of each
248  invariant are used.
249  \param nBitsPerEntry: number of bits to use in simulating counts
250  \param includeChirality: if set, chirality will be used in the atom invariants
251  (note: this is ignored if atomInvariants are
252  provided)
253 
254  \return a pointer to the fingerprint. The client is
255  responsible for calling delete on this.
256 
257 */
260  const ROMol &mol, unsigned int nBits = 2048, unsigned int targetSize = 4,
261  const std::vector<std::uint32_t> *fromAtoms = 0,
262  const std::vector<std::uint32_t> *ignoreAtoms = 0,
263  const std::vector<std::uint32_t> *atomInvariants = 0,
264  unsigned int nBitsPerEntry = 4, bool includeChirality = false);
265 } // namespace AtomPairs
266 } // namespace RDKit
267 
268 #endif
RDKit::AtomPairs::getHashedAtomPairFingerprint
RDKIT_FINGERPRINTS_EXPORT SparseIntVect< std::int32_t > * getHashedAtomPairFingerprint(const ROMol &mol, unsigned int nBits=2048, unsigned int minLength=1, unsigned int maxLength=maxPathLen - 1, const std::vector< std::uint32_t > *fromAtoms=0, const std::vector< std::uint32_t > *ignoreAtoms=0, const std::vector< std::uint32_t > *atomInvariants=0, bool includeChirality=false, bool use2D=true, int confId=-1)
returns the hashed atom-pair fingerprint for a molecule
RDKit::AtomPairs::getHashedTopologicalTorsionFingerprint
RDKIT_FINGERPRINTS_EXPORT SparseIntVect< boost::int64_t > * getHashedTopologicalTorsionFingerprint(const ROMol &mol, unsigned int nBits=2048, unsigned int targetSize=4, const std::vector< std::uint32_t > *fromAtoms=0, const std::vector< std::uint32_t > *ignoreAtoms=0, const std::vector< std::uint32_t > *atomInvariants=0, bool includeChirality=false)
returns a hashed topological-torsion fingerprint for a molecule
RDKit::AtomPairs::atomPairsVersion
const std::string atomPairsVersion
Definition: AtomPairs.h:44
RDKit::ROMol
Definition: ROMol.h:171
RDKit::AtomPairs::getHashedTopologicalTorsionFingerprintAsBitVect
RDKIT_FINGERPRINTS_EXPORT ExplicitBitVect * getHashedTopologicalTorsionFingerprintAsBitVect(const ROMol &mol, unsigned int nBits=2048, unsigned int targetSize=4, const std::vector< std::uint32_t > *fromAtoms=0, const std::vector< std::uint32_t > *ignoreAtoms=0, const std::vector< std::uint32_t > *atomInvariants=0, unsigned int nBitsPerEntry=4, bool includeChirality=false)
returns a hashed topological-torsion fingerprint for a molecule as a bit
RDKit::AtomPairs::getAtomPairFingerprint
RDKIT_FINGERPRINTS_EXPORT SparseIntVect< std::int32_t > * getAtomPairFingerprint(const ROMol &mol, unsigned int minLength, unsigned int maxLength, const std::vector< std::uint32_t > *fromAtoms=0, const std::vector< std::uint32_t > *ignoreAtoms=0, const std::vector< std::uint32_t > *atomInvariants=0, bool includeChirality=false, bool use2D=true, int confId=-1)
returns the atom-pair fingerprint for a molecule
RDKit
Std stuff.
Definition: Atom.h:30
RDKit::AtomPairs::getHashedAtomPairFingerprintAsBitVect
RDKIT_FINGERPRINTS_EXPORT ExplicitBitVect * getHashedAtomPairFingerprintAsBitVect(const ROMol &mol, unsigned int nBits=2048, unsigned int minLength=1, unsigned int maxLength=maxPathLen - 1, const std::vector< std::uint32_t > *fromAtoms=0, const std::vector< std::uint32_t > *ignoreAtoms=0, const std::vector< std::uint32_t > *atomInvariants=0, unsigned int nBitsPerEntry=4, bool includeChirality=false, bool use2D=true, int confId=-1)
returns the hashed atom-pair fingerprint for a molecule as a bit vector
SparseIntVect.h
BitVects.h
Pulls in all the BitVect classes.
RDKit::AtomPairs::getTopologicalTorsionFingerprint
RDKIT_FINGERPRINTS_EXPORT SparseIntVect< boost::int64_t > * getTopologicalTorsionFingerprint(const ROMol &mol, unsigned int targetSize=4, const std::vector< std::uint32_t > *fromAtoms=0, const std::vector< std::uint32_t > *ignoreAtoms=0, const std::vector< std::uint32_t > *atomInvariants=0, bool includeChirality=false)
returns the topological-torsion fingerprint for a molecule
RDKIT_FINGERPRINTS_EXPORT
#define RDKIT_FINGERPRINTS_EXPORT
Definition: export.h:242
RDKit::AtomPairs::maxPathLen
const unsigned int maxPathLen
Definition: FingerprintUtil.h:38
FingerprintUtil.h
RDKit::SparseIntVect
a class for efficiently storing sparse vectors of ints
Definition: SparseIntVect.h:28
ExplicitBitVect
a class for bit vectors that are densely occupied
Definition: ExplicitBitVect.h:29
export.h