RDKit
Open-source cheminformatics and machine learning.
MolStandardize.h
Go to the documentation of this file.
1 //
2 // Copyright (C) 2018 Susan H. Leung
3 //
4 // @@ All Rights Reserved @@
5 // This file is part of the RDKit.
6 // The contents are covered by the terms of the BSD license
7 // which is included in the file license.txt, found at the root
8 // of the RDKit source tree.
9 //
10 /*! \file MolStandardize.h
11 
12  \brief Defines the CleanupParameters and some convenience functions.
13 
14 */
15 #include <RDGeneral/export.h>
16 #ifndef __RD_MOLSTANDARDIZE_H__
17 #define __RD_MOLSTANDARDIZE_H__
18 
19 #include <string>
20 #include <GraphMol/RDKitBase.h>
21 
22 namespace RDKit {
23 class RWMol;
24 class ROMol;
25 
26 namespace MolStandardize {
27 
28 //! The CleanupParameters structure defines the default parameters for the
29 // cleanup process and also allows the user to customize the process by changing
30 // the parameters.
31 /*!
32 
33  <b>Notes:</b>
34  - To customize the parameters, the stucture must be initialized first.
35  (Another on the TODO list)
36  - For this project, not all the parameters have been revealed.
37  (TODO)
38 
39 */
41  // TODO reveal all parameters
42  private:
43  const char *rdbase_cstr = std::getenv("RDBASE");
44 
45  public:
46  std::string rdbase = rdbase_cstr != nullptr ? rdbase_cstr : "";
47  std::string normalizations;
48  std::string acidbaseFile;
49  std::string fragmentFile;
50  // std::vector<std::string> chargeCorrections;
51  std::string tautomerTransforms;
52  // std::vector<std::string> TautomerScores;
53  int maxRestarts; // The maximum number of times to attempt to apply the
54  // series of normalizations (default 200).
55  int maxTautomers; // The maximum number of tautomers to enumerate (default
56  // 1000).
57  bool preferOrganic; // Whether to prioritize organic fragments when choosing
58  // fragment parent (default False).
59  bool doCanonical; // whether or not to apply normalizations in a canonical
60  // order
61 
63  : // TODO
64  // normalizations(""),//this->DEFAULT_TRANSFORMS),
65  normalizations(rdbase + "/Data/MolStandardize/normalizations.txt"),
66  acidbaseFile(rdbase + "/Data/MolStandardize/acid_base_pairs.txt"),
67  fragmentFile(rdbase + "/Data/MolStandardize/fragmentPatterns.txt"),
68  // chargeCorrections()
69  tautomerTransforms(rdbase +
70  "/Data/MolStandardize/tautomerTransforms.in"),
71  // TautomerScores()
72  maxRestarts(200),
73  maxTautomers(1000),
74  preferOrganic(false),
75  doCanonical(true) {}
76 };
77 
78 RDKIT_MOLSTANDARDIZE_EXPORT extern const CleanupParameters
80 
81 //! The cleanup function is equivalent to the
82 // molvs.Standardizer().standardize(mol) function. It calls the same steps,
83 // namely: RemoveHs, RDKit SanitizeMol, MetalDisconnector, Normalizer,
84 // Reionizer, RDKit AssignStereochemistry.
86  const RWMol &mol,
87  const CleanupParameters &params = defaultCleanupParameters);
88 
89 //! TODO not yet finished!
91  RWMol &mol, const CleanupParameters &params = defaultCleanupParameters);
92 
93 //! Returns the fragment parent of a given molecule. The fragment parent is the
94 // largest organic covalent unit in the molecule.
96  const RWMol &mol,
97  const CleanupParameters &params = defaultCleanupParameters,
98  bool skip_standardize = false);
99 
100 // TODO
102  RWMol &mol, const CleanupParameters &params = defaultCleanupParameters);
103 
104 // TODO
106  RWMol &mol, const CleanupParameters &params = defaultCleanupParameters);
107 
108 //! Returns the charge parent of a given molecule. The charge parent is the
109 //! uncharged
110 // version of the fragment parent.
112  const RWMol &mol,
113  const CleanupParameters &params = defaultCleanupParameters,
114  bool skip_standardize = false);
115 
116 // TODO Need to do tautomers first
118  RWMol &mol, const CleanupParameters &params = defaultCleanupParameters);
119 
120 //! Works the same as Normalizer().normalize(mol)
122  const RWMol *mol,
123  const CleanupParameters &params = defaultCleanupParameters);
124 
125 //! Works the same as Reionizer().reionize(mol)
127  const RWMol *mol,
128  const CleanupParameters &params = defaultCleanupParameters);
129 
130 //! Convenience function for quickly standardizing a single SMILES string.
131 // Returns a standardized canonical SMILES string given a SMILES string.
133  const std::string &smiles);
134 
135 //! TODO
137  const std::string &smiles,
138  const CleanupParameters &params = defaultCleanupParameters);
139 }; // namespace MolStandardize
140 } // namespace RDKit
141 #endif
RDKit::MolStandardize::enumerateTautomerSmiles
RDKIT_MOLSTANDARDIZE_EXPORT std::vector< std::string > enumerateTautomerSmiles(const std::string &smiles, const CleanupParameters &params=defaultCleanupParameters)
TODO.
RDKit::MolStandardize::fragmentParent
RDKIT_MOLSTANDARDIZE_EXPORT RWMol * fragmentParent(const RWMol &mol, const CleanupParameters &params=defaultCleanupParameters, bool skip_standardize=false)
Returns the fragment parent of a given molecule. The fragment parent is the.
RDKit::RWMol
RWMol is a molecule class that is intended to be edited.
Definition: RWMol.h:31
RDKit::MolStandardize::stereoParent
RDKIT_MOLSTANDARDIZE_EXPORT void stereoParent(RWMol &mol, const CleanupParameters &params=defaultCleanupParameters)
RDKit::MolStandardize::CleanupParameters::preferOrganic
bool preferOrganic
Definition: MolStandardize.h:57
RDKit::MolStandardize::standardizeSmiles
RDKIT_MOLSTANDARDIZE_EXPORT std::string standardizeSmiles(const std::string &smiles)
Convenience function for quickly standardizing a single SMILES string.
RDKit::MolStandardize::chargeParent
RDKIT_MOLSTANDARDIZE_EXPORT RWMol * chargeParent(const RWMol &mol, const CleanupParameters &params=defaultCleanupParameters, bool skip_standardize=false)
RDKit::MolStandardize::CleanupParameters::maxRestarts
int maxRestarts
Definition: MolStandardize.h:53
RDKit::MolStandardize::isotopeParent
RDKIT_MOLSTANDARDIZE_EXPORT void isotopeParent(RWMol &mol, const CleanupParameters &params=defaultCleanupParameters)
RDKit::MolStandardize::CleanupParameters::doCanonical
bool doCanonical
Definition: MolStandardize.h:59
RDKit::MolStandardize::superParent
RDKIT_MOLSTANDARDIZE_EXPORT void superParent(RWMol &mol, const CleanupParameters &params=defaultCleanupParameters)
RDKitBase.h
pulls in the core RDKit functionality
RDKit::MolStandardize::CleanupParameters::fragmentFile
std::string fragmentFile
Definition: MolStandardize.h:49
RDKit::MolStandardize::cleanup
RDKIT_MOLSTANDARDIZE_EXPORT RWMol * cleanup(const RWMol &mol, const CleanupParameters &params=defaultCleanupParameters)
The cleanup function is equivalent to the.
RDKit::MolStandardize::CleanupParameters::CleanupParameters
CleanupParameters()
Definition: MolStandardize.h:62
RDKit::MolStandardize::CleanupParameters::normalizations
std::string normalizations
Definition: MolStandardize.h:47
RDKit::MolStandardize::normalize
RDKIT_MOLSTANDARDIZE_EXPORT RWMol * normalize(const RWMol *mol, const CleanupParameters &params=defaultCleanupParameters)
Works the same as Normalizer().normalize(mol)
RDKit::MolStandardize::CleanupParameters
The CleanupParameters structure defines the default parameters for the.
Definition: MolStandardize.h:40
RDKit
Std stuff.
Definition: Atom.h:30
RDKit::MolStandardize::tautomerParent
RDKIT_MOLSTANDARDIZE_EXPORT void tautomerParent(RWMol &mol, const CleanupParameters &params=defaultCleanupParameters)
TODO not yet finished!
RDKit::MolStandardize::reionize
RDKIT_MOLSTANDARDIZE_EXPORT RWMol * reionize(const RWMol *mol, const CleanupParameters &params=defaultCleanupParameters)
Works the same as Reionizer().reionize(mol)
RDKit::MolStandardize::CleanupParameters::tautomerTransforms
std::string tautomerTransforms
Definition: MolStandardize.h:51
RDKit::MolStandardize::CleanupParameters::acidbaseFile
std::string acidbaseFile
Definition: MolStandardize.h:48
RDKIT_MOLSTANDARDIZE_EXPORT
#define RDKIT_MOLSTANDARDIZE_EXPORT
Definition: export.h:437
RDKit::MolStandardize::CleanupParameters::maxTautomers
int maxTautomers
Definition: MolStandardize.h:55
RDKit::MolStandardize::defaultCleanupParameters
const RDKIT_MOLSTANDARDIZE_EXPORT CleanupParameters defaultCleanupParameters
Definition: Fragment.h:25
export.h