RDKit
Open-source cheminformatics and machine learning.
MorganGenerator.h
Go to the documentation of this file.
1 //
2 // Copyright (C) 2018 Boran Adas, Google Summer of Code
3 //
4 // @@ All Rights Reserved @@
5 // This file is part of the RDKit.
6 // The contents are covered by the terms of the BSD license
7 // which is included in the file license.txt, found at the root
8 // of the RDKit source tree.
9 //
10 
11 #include <RDGeneral/export.h>
12 #ifndef RD_MORGANGEN_H_2018_07
13 #define RD_MORGANGEN_H_2018_07
14 
16 #include <cstdint>
17 
18 namespace RDKit {
19 
20 namespace MorganFingerprint {
21 
22 /**
23  \brief Default atom invariants generator for Morgan fingerprint, generates
24  ECFP-type invariants
25 
26  */
28  : public AtomInvariantsGenerator {
29  const bool df_includeRingMembership;
30 
31  public:
32  /**
33  \brief Construct a new MorganAtomInvGenerator object
34 
35  \param includeRingMembership : if set, whether or not the atom is in a ring
36  will be used in the invariant list.
37  */
38  MorganAtomInvGenerator(const bool includeRingMembership = true);
39 
40  std::vector<std::uint32_t> *getAtomInvariants(const ROMol &mol) const;
41 
42  std::string infoString() const;
43  MorganAtomInvGenerator *clone() const;
44 };
45 
46 /**
47  \brief Alternative atom invariants generator for Morgan fingerprint, generate
48  FCFP-type invariants
49 
50  */
52  : public AtomInvariantsGenerator {
53  std::vector<const ROMol *> *dp_patterns;
54 
55  public:
56  /**
57  \brief Construct a new MorganFeatureAtomInvGenerator object
58 
59  \param patterns : if provided should contain the queries used to assign
60  atom-types. if not provided, feature definitions adapted from reference:
61  Gobbi and Poppinger, Biotech. Bioeng. _61_ 47-54 (1998) will be used for
62  Donor, Acceptor, Aromatic, Halogen, Basic, Acidic.
63  */
64  MorganFeatureAtomInvGenerator(std::vector<const ROMol *> *patterns = nullptr);
65 
66  std::vector<std::uint32_t> *getAtomInvariants(const ROMol &mol) const;
67 
68  std::string infoString() const;
69  MorganFeatureAtomInvGenerator *clone() const;
70 };
71 
72 /**
73  \brief Bond invariants generator for Morgan fingerprint
74 
75  */
77  : public BondInvariantsGenerator {
78  const bool df_useBondTypes;
79  const bool df_useChirality;
80 
81  public:
82  /**
83  \brief Construct a new MorganBondInvGenerator object
84 
85  \param useBondTypes : if set, bond types will be included as a part of the
86  bond invariants
87  \param useChirality : if set, chirality information will be included as a
88  part of the bond invariants
89  */
90  MorganBondInvGenerator(const bool useBondTypes = true,
91  const bool useChirality = false);
92 
93  std::vector<std::uint32_t> *getBondInvariants(const ROMol &mol) const;
94 
95  std::string infoString() const;
96  MorganBondInvGenerator *clone() const;
98 };
99 
100 /**
101  \brief Class for holding Morgan fingerprint specific arguments
102 
103  */
104 template <typename OutputType>
106  : public FingerprintArguments<OutputType> {
107  public:
110  const unsigned int d_radius;
111 
112  OutputType getResultSize() const;
113 
114  std::string infoString() const;
115 
116  /**
117  \brief Construct a new MorganArguments object
118 
119  \param radius the number of iterations to grow the fingerprint
120  \param countSimulation if set, use count simulation while generating the
121  fingerprint
122  \param includeChirality if set, chirality information will be added to the
123  generated bit id, independently from bond invariants
124  \param onlyNonzeroInvariants if set, bits will only be set from atoms that
125  have a nonzero invariant
126  \param countBounds boundaries for count simulation, corresponding bit will
127  be set if the count is higher than the number provided for that spot
128  \param fpSize size of the generated fingerprint, does not affect the sparse
129  versions
130  */
131  MorganArguments(const unsigned int radius, const bool countSimulation = false,
132  const bool includeChirality = false,
133  const bool onlyNonzeroInvariants = false,
134  const std::vector<std::uint32_t> countBounds = {1, 2, 4, 8},
135  const std::uint32_t fpSize = 2048);
136 };
137 
138 /**
139  \brief Class for holding the bit-id created from Morgan fingerprint
140  environments and the additional data necessary extra outputs
141 
142  */
143 template <typename OutputType>
145  : public AtomEnvironment<OutputType> {
146  const OutputType d_code;
147  const unsigned int d_atomId;
148  const unsigned int d_layer;
149 
150  public:
151  OutputType getBitId(FingerprintArguments<OutputType> *arguments,
152  const std::vector<std::uint32_t> *atomInvariants,
153  const std::vector<std::uint32_t> *bondInvariants,
154  const AdditionalOutput *additionalOutput,
155  const bool hashResults = false) const;
156 
157  /**
158  \brief Construct a new MorganAtomEnv object
159 
160  \param code bit id generated from this environment
161  \param atomId atom id of the atom at the center of this environment
162  \param layer radius of this environment
163  */
164  MorganAtomEnv(const std::uint32_t code, const unsigned int atomId,
165  const unsigned int layer);
166 };
167 
168 /**
169  \brief Class that generates atom environments for Morgan fingerprint
170 
171  */
172 template <typename OutputType>
174  : public AtomEnvironmentGenerator<OutputType> {
175  public:
176  std::vector<AtomEnvironment<OutputType> *> getEnvironments(
177  const ROMol &mol, FingerprintArguments<OutputType> *arguments,
178  const std::vector<std::uint32_t> *fromAtoms,
179  const std::vector<std::uint32_t> *ignoreAtoms, const int confId,
180  const AdditionalOutput *additionalOutput,
181  const std::vector<std::uint32_t> *atomInvariants,
182  const std::vector<std::uint32_t> *bondInvariants,
183  const bool hashResults = false) const;
184 
185  std::string infoString() const;
186 };
187 
188 /**
189  \brief Get a fingerprint generator for Morgan fingerprint
190 
191  \param OutputType determines the size of the bitIds and the result, can be 32
192  or 64 bit unsigned integer
193 
194  \param radiu the number of iterations to grow the fingerprint
195 
196  \param countSimulation if set, use count simulation while generating the
197  fingerprint
198 
199  \param includeChirality if set, chirality information will be added to the
200  generated bit id, independently from bond invariants
201 
202  \param onlyNonzeroInvariants if set, bits will only be set from atoms that
203  have a nonzero invariant
204 
205  \param countBounds boundaries for count simulation, corresponding bit will be
206  set if the count is higher than the number provided for that spot
207 
208  \param fpSize size of the generated fingerprint, does not affect the sparse
209  versions
210  \param countSimulation if set, use count simulation while generating the
211  fingerprint
212  \param includeChirality sets includeChirality flag for both MorganArguments
213  and the default bond generator MorganBondInvGenerator
214  \param useBondTypes if set, bond types will be included as a part of the
215  default bond invariants
216  \param onlyNonzeroInvariants if set, bits will only be set from atoms that
217  have a nonzero invariant
218  \param atomInvariantsGenerator custom atom invariants generator to use
219  \param bondInvariantsGenerator custom bond invariants generator to use
220  \param ownsAtomInvGen if set atom invariants generator is destroyed with the
221  fingerprint generator
222  \param ownsBondInvGen if set bond invariants generator is destroyed with the
223  fingerprint generator
224 
225  \return FingerprintGenerator<OutputType>* that generates Morgan fingerprints
226  */
227 template <typename OutputType>
229  const unsigned int radius, const bool countSimulation = false,
230  const bool includeChirality = false, const bool useBondTypes = true,
231  const bool onlyNonzeroInvariants = false,
232  AtomInvariantsGenerator *atomInvariantsGenerator = nullptr,
233  BondInvariantsGenerator *bondInvariantsGenerator = nullptr,
234  const std::uint32_t fpSize = 2048,
235  const std::vector<std::uint32_t> countBounds = {1, 2, 4, 8},
236  const bool ownsAtomInvGen = false, const bool ownsBondInvGen = false);
237 
238 } // namespace MorganFingerprint
239 } // namespace RDKit
240 
241 #endif
RDKit::MorganFingerprint::MorganArguments
Class for holding Morgan fingerprint specific arguments.
Definition: MorganGenerator.h:105
RDKit::FingerprintGenerator
class that generates same fingerprint style for different output formats
Definition: FingerprintGenerator.h:239
RDKit::MorganFingerprint::MorganArguments::df_onlyNonzeroInvariants
const bool df_onlyNonzeroInvariants
Definition: MorganGenerator.h:109
RDKit::AdditionalOutput
Definition: FingerprintGenerator.h:23
RDKit::MorganFingerprint::MorganEnvGenerator
Class that generates atom environments for Morgan fingerprint.
Definition: MorganGenerator.h:173
RDKit::MorganFingerprint::MorganArguments::df_includeChirality
const bool df_includeChirality
Definition: MorganGenerator.h:108
RDKit::MorganFingerprint::getMorganGenerator
RDKIT_FINGERPRINTS_EXPORT FingerprintGenerator< OutputType > * getMorganGenerator(const unsigned int radius, const bool countSimulation=false, const bool includeChirality=false, const bool useBondTypes=true, const bool onlyNonzeroInvariants=false, AtomInvariantsGenerator *atomInvariantsGenerator=nullptr, BondInvariantsGenerator *bondInvariantsGenerator=nullptr, const std::uint32_t fpSize=2048, const std::vector< std::uint32_t > countBounds={1, 2, 4, 8}, const bool ownsAtomInvGen=false, const bool ownsBondInvGen=false)
Get a fingerprint generator for Morgan fingerprint.
RDKit::MorganFingerprint::MorganFeatureAtomInvGenerator
Alternative atom invariants generator for Morgan fingerprint, generate FCFP-type invariants.
Definition: MorganGenerator.h:51
RDKit::ROMol
Definition: ROMol.h:171
RDKit::AtomInvariantsGenerator
abstract base class for atom invariants generators
Definition: FingerprintGenerator.h:177
RDKit::BondInvariantsGenerator
abstract base class for bond invariants generators
Definition: FingerprintGenerator.h:207
FingerprintGenerator.h
RDKit::MorganFingerprint::MorganBondInvGenerator
Bond invariants generator for Morgan fingerprint.
Definition: MorganGenerator.h:76
RDKit::AtomEnvironment
abstract base class that holds atom-environments that will be hashed to generate the fingerprint
Definition: FingerprintGenerator.h:96
RDKit
Std stuff.
Definition: Atom.h:30
RDKit::MorganFingerprint::MorganBondInvGenerator::~MorganBondInvGenerator
~MorganBondInvGenerator()
Definition: MorganGenerator.h:97
RDKit::AtomEnvironmentGenerator
abstract base class that generates atom-environments from a molecule
Definition: FingerprintGenerator.h:123
RDKit::MorganFingerprint::MorganAtomInvGenerator
Default atom invariants generator for Morgan fingerprint, generates ECFP-type invariants.
Definition: MorganGenerator.h:27
RDKIT_FINGERPRINTS_EXPORT
#define RDKIT_FINGERPRINTS_EXPORT
Definition: export.h:242
RDKit::MorganFingerprint::MorganAtomEnv
Class for holding the bit-id created from Morgan fingerprint environments and the additional data nec...
Definition: MorganGenerator.h:144
RDKit::MorganFingerprint::MorganArguments::d_radius
const unsigned int d_radius
Definition: MorganGenerator.h:110
RDKit::FingerprintArguments
Abstract base class that holds molecule independent arguments that are common amongst all fingerprint...
Definition: FingerprintGenerator.h:52
export.h