RDKit
Open-source cheminformatics and machine learning.
RDKitFPGenerator.h
Go to the documentation of this file.
1 //
2 // Copyright (C) 2018 Boran Adas, Google Summer of Code
3 //
4 // @@ All Rights Reserved @@
5 // This file is part of the RDKit.
6 // The contents are covered by the terms of the BSD license
7 // which is included in the file license.txt, found at the root
8 // of the RDKit source tree.
9 //
10 
11 #include <RDGeneral/export.h>
12 #ifndef RD_RDFINGERPRINTGEN_H_2018_07
13 #define RD_RDFINGERPRINTGEN_H_2018_07
14 
16 
17 namespace RDKit {
18 namespace RDKitFP {
19 
20 template <typename OutputType>
22  : public FingerprintArguments<OutputType> {
23  public:
24  const unsigned int d_minPath;
25  const unsigned int d_maxPath;
26  const bool df_useHs;
27  const bool df_branchedPaths;
28  const bool df_useBondOrder;
29 
30  OutputType getResultSize() const;
31 
32  std::string infoString() const;
33 
34  /**
35  \brief Construct a new RDKitFPArguments object
36 
37  \param minPath the minimum path length (in bonds) to be included
38  \param maxPath the maximum path length (in bonds) to be included
39  \param useHs toggles inclusion of Hs in paths (if the molecule has
40  explicit Hs)
41  \param branchedPaths toggles generation of branched subgraphs, not just
42  linear paths
43  \param useBondOrder toggles inclusion of bond orders in the path hashes
44  \param countSimulation if set, use count simulation while
45  generating the fingerprint
46  \param countBounds boundaries for count simulation, corresponding bit will
47  be set if the count is higher than the number provided for that spot
48  \param fpSize size of the generated fingerprint, does not affect the sparse
49  versions
50  \param numBitsPerFeature controls the number of bits that are set for each
51  path/subgraph found
52 
53  */
54  RDKitFPArguments(unsigned int minPath, unsigned int maxPath, bool useHs,
55  bool branchedPaths, bool useBondOrder, bool countSimulation,
56  const std::vector<std::uint32_t> countBounds,
57  std::uint32_t fpSize, std::uint32_t numBitsPerFeature);
58 };
59 
61  : public AtomInvariantsGenerator {
62  public:
63  std::vector<std::uint32_t> *getAtomInvariants(const ROMol &mol) const;
64 
65  std::string infoString() const;
66  RDKitFPAtomInvGenerator *clone() const;
67 };
68 
69 template <typename OutputType>
71  : public AtomEnvironment<OutputType> {
72  const OutputType d_bitId;
73  const boost::dynamic_bitset<> d_atomsInPath;
74 
75  public:
76  OutputType getBitId(FingerprintArguments<OutputType> *arguments,
77  const std::vector<std::uint32_t> *atomInvariants,
78  const std::vector<std::uint32_t> *bondInvariants,
79  const AdditionalOutput *additionalOutput,
80  bool hashResults = false) const;
81 
82  /**
83  \brief Construct a new RDKitFPAtomEnv object
84 
85  \param bitId bitId generated for this environment
86  \param atomsInPath holds atoms in this environment to set additional output
87 
88  */
89  RDKitFPAtomEnv(const OutputType bitId,
90  const boost::dynamic_bitset<> &atomsInPath);
91 };
92 
93 template <typename OutputType>
95  : public AtomEnvironmentGenerator<OutputType> {
96  public:
97  std::vector<AtomEnvironment<OutputType> *> getEnvironments(
98  const ROMol &mol, FingerprintArguments<OutputType> *arguments,
99  const std::vector<std::uint32_t> *fromAtoms,
100  const std::vector<std::uint32_t> *ignoreAtoms, int confId,
101  const AdditionalOutput *additionalOutput,
102  const std::vector<std::uint32_t> *atomInvariants,
103  const std::vector<std::uint32_t> *bondInvariants,
104  bool hashResults = false) const;
105 
106  std::string infoString() const;
107 };
108 
109 /**
110  \brief Get a RDKit fingerprint generator with given parameters
111 
112  \tparam OutputType determines the size of the bitIds and the result, can be 32
113  or 64 bit unsigned integer
114  \param minPath the minimum path length (in bonds) to be included
115  \param maxPath the maximum path length (in bonds) to be included
116  \param useHs toggles inclusion of Hs in paths (if the molecule has
117  explicit Hs)
118  \param branchedPaths toggles generation of branched subgraphs, not just
119  linear paths
120  \param useBondOrder toggles inclusion of bond orders in the path hashes
121  \param atomInvariantsGenerator custom atom invariants generator to use
122  \param countSimulation if set, use count simulation while
123  generating the fingerprint
124  \param countBounds boundaries for count simulation, corresponding bit will be
125  set if the count is higher than the number provided for that spot
126  \param fpSize size of the generated fingerprint, does not affect the sparse
127  versions
128  \param numBitsPerFeature controls the number of bits that are set for each
129  path/subgraph found
130  \param ownsAtomInvGen if set atom invariants generator is destroyed with the
131  fingerprint generator
132 
133  /return FingerprintGenerator<OutputType>* that generated RDKit fingerprints
134  */
135 template <typename OutputType>
137  unsigned int minPath = 1, unsigned int maxPath = 7, bool useHs = true,
138  bool branchedPaths = true, bool useBondOrder = true,
139  AtomInvariantsGenerator *atomInvariantsGenerator = nullptr,
140  bool countSimulation = false,
141  const std::vector<std::uint32_t> countBounds = {1, 2, 4, 8},
142  std::uint32_t fpSize = 2048, std::uint32_t numBitsPerFeature = 2,
143  bool ownsAtomInvGen = false);
144 
145 } // namespace RDKitFP
146 } // namespace RDKit
147 
148 #endif
RDKit::RDKitFP::RDKitFPAtomEnv
Definition: RDKitFPGenerator.h:70
RDKit::FingerprintGenerator
class that generates same fingerprint style for different output formats
Definition: FingerprintGenerator.h:239
RDKit::RDKitFP
@ RDKitFP
Definition: ReactionFingerprints.h:47
RDKit::RDKitFP::RDKitFPArguments::df_useBondOrder
const bool df_useBondOrder
Definition: RDKitFPGenerator.h:28
RDKit::RDKitFP::RDKitFPEnvGenerator
Definition: RDKitFPGenerator.h:94
RDKit::AdditionalOutput
Definition: FingerprintGenerator.h:23
RDKit::RDKitFP::RDKitFPArguments::df_useHs
const bool df_useHs
Definition: RDKitFPGenerator.h:26
RDKit::RDKitFP::RDKitFPArguments::df_branchedPaths
const bool df_branchedPaths
Definition: RDKitFPGenerator.h:27
RDKit::ROMol
Definition: ROMol.h:171
RDKit::AtomInvariantsGenerator
abstract base class for atom invariants generators
Definition: FingerprintGenerator.h:177
RDKit::RDKitFP::RDKitFPArguments::d_maxPath
const unsigned int d_maxPath
Definition: RDKitFPGenerator.h:25
RDKit::RDKitFP::RDKitFPArguments::d_minPath
const unsigned int d_minPath
Definition: RDKitFPGenerator.h:24
FingerprintGenerator.h
RDKit::RDKitFP::getRDKitFPGenerator
RDKIT_FINGERPRINTS_EXPORT FingerprintGenerator< OutputType > * getRDKitFPGenerator(unsigned int minPath=1, unsigned int maxPath=7, bool useHs=true, bool branchedPaths=true, bool useBondOrder=true, AtomInvariantsGenerator *atomInvariantsGenerator=nullptr, bool countSimulation=false, const std::vector< std::uint32_t > countBounds={1, 2, 4, 8}, std::uint32_t fpSize=2048, std::uint32_t numBitsPerFeature=2, bool ownsAtomInvGen=false)
Get a RDKit fingerprint generator with given parameters.
RDKit::AtomEnvironment
abstract base class that holds atom-environments that will be hashed to generate the fingerprint
Definition: FingerprintGenerator.h:96
RDKit
Std stuff.
Definition: Atom.h:30
RDKit::AtomEnvironmentGenerator
abstract base class that generates atom-environments from a molecule
Definition: FingerprintGenerator.h:123
RDKIT_FINGERPRINTS_EXPORT
#define RDKIT_FINGERPRINTS_EXPORT
Definition: export.h:242
RDKit::RDKitFP::RDKitFPAtomInvGenerator
Definition: RDKitFPGenerator.h:60
RDKit::RDKitFP::RDKitFPArguments
Definition: RDKitFPGenerator.h:21
RDKit::FingerprintArguments
Abstract base class that holds molecule independent arguments that are common amongst all fingerprint...
Definition: FingerprintGenerator.h:52
export.h