RDKit
Open-source cheminformatics and machine learning.
ChemTransforms.h
Go to the documentation of this file.
1 //
2 // Copyright (C) 2006-2012 Greg Landrum
3 //
4 // @@ All Rights Reserved @@
5 // This file is part of the RDKit.
6 // The contents are covered by the terms of the BSD license
7 // which is included in the file license.txt, found at the root
8 // of the RDKit source tree.
9 //
10 #include <RDGeneral/export.h>
11 #ifndef _RD_CHEMTRANSFORMS_H__
12 #define _RD_CHEMTRANSFORMS_H__
13 
14 #include <boost/smart_ptr.hpp>
15 #include <vector>
16 #include <iostream>
17 
19 #include "MolFragmenter.h"
20 
21 namespace RDKit {
22 class ROMol;
23 typedef boost::shared_ptr<ROMol> ROMOL_SPTR;
24 
25 //! \brief Returns a copy of an ROMol with the atoms and bonds that
26 //! match a pattern removed.
27 /*!
28  \param mol the ROMol of interest
29  \param query the query ROMol
30  \param onlyFrags if this is set, atoms will only be removed if
31  the entire fragment in which they are found is
32  matched by the query.
33  \param useChirality - if set, match the coreQuery using chirality
34 
35  \return a copy of \c mol with the matching atoms and bonds (if any)
36  removed.
37 */
39  const ROMol &query,
40  bool onlyFrags = false,
41  bool useChirality = false);
42 
43 //! \brief Returns a list of copies of an ROMol with the atoms and bonds that
44 //! match a pattern replaced with the atoms contained in another molecule.
45 /*!
46  Bonds are created between the joining atom in the existing molecule
47  and the atoms in the new molecule. So, using SMILES instead of molecules:
48  replaceSubstructs('OC(=O)NCCNC(=O)O','C(=O)O','[X]') ->
49  ['[X]NCCNC(=O)O','OC(=O)NCCN[X]']
50  replaceSubstructs('OC(=O)NCCNC(=O)O','C(=O)O','[X]',true) ->
51  ['[X]NCCN[X]']
52  Chains should be handled "correctly":
53  replaceSubstructs('CC(=O)C','C(=O)','[X]') ->
54  ['C[X]C']
55  As should rings:
56  replaceSubstructs('C1C(=O)C1','C(=O)','[X]') ->
57  ['C1[X]C1']
58  And higher order branches:
59  replaceSubstructs('CC(=O)(C)C','C(=O)','[X]') ->
60  ['C[X](C)C']
61  Note that the client is responsible for making sure that the
62  resulting molecule actually makes sense - this function does not
63  perform sanitization.
64 
65  \param mol the ROMol of interest
66  \param query the query ROMol
67  \param replacement the ROMol to be inserted
68  \param replaceAll if this is true, only a single result, with all
69  \param useChirality - if set, match the coreQuery using chirality
70 
71  occurances
72  of the substructure replaced, will be returned.
73  \param replacementConnectionPoint index of the atom in the replacement
74  that
75  the bond should made to
76 
77  \return a vector of pointers to copies of \c mol with the matching atoms
78  and bonds (if any) replaced
79 
80 */
81 RDKIT_CHEMTRANSFORMS_EXPORT std::vector<ROMOL_SPTR> replaceSubstructs(
82  const ROMol &mol, const ROMol &query, const ROMol &replacement,
83  bool replaceAll = false, unsigned int replacementConnectionPoint = 0,
84  bool useChirality = false);
85 
86 //! \brief Returns a copy of an ROMol with the atoms and bonds that
87 //! don't fall within a substructure match removed.
88 //!
89 //! dummy atoms are left to indicate attachment points.
90 //!
91 /*!
92  \param mol the ROMol of interest
93  \param coreQuery a query ROMol to be used to match the core
94  \param useChirality - if set, match the coreQuery using chirality
95 
96  \return a copy of \c mol with the non-matching atoms and bonds (if any)
97  removed and dummies at the connection points.
98 */
99 
101  const ROMol &coreQuery,
102  bool useChirality = false);
103 
104 //! \brief Returns a copy of an ROMol with the atoms and bonds that
105 //! are referenced by the MatchVector removed.
106 //! MatchVector must be defined between mol and the specified core.
107 //!
108 //! dummy atoms are left to indicate attachment points.
109 //! These dummy atoms can be labeled either by the matching index
110 //! in the query or by an arbitrary "first match" found.
111 //! Additional matching options are given below.
112 //!
113 /*!
114  Note that this is essentially identical to the replaceSidechains function,
115  except we
116  invert the query and replace the atoms that *do* match the query.
117 
118  \param mol - the ROMol of interest
119  \param core - the core being matched against
120  \param matchVect - a matchVect of the type returned by Substructure
121  Matching \param replaceDummies - if set, atoms matching dummies in the core
122  will also be replaced \param labelByIndex - if set, the dummy atoms at
123  attachment points are labelled with the index+1 of the corresponding atom in
124  the core \param requireDummyMatch - if set, only side chains that are
125  connected to atoms in the core that have attached dummies will be considered.
126  Molecules that have sidechains that are attached
127  at other points will be rejected (NULL returned).
128  \param useChirality - if set, match the coreQuery using chirality
129 
130  \return a copy of \c mol with the non-matching atoms and bonds (if any)
131  removed and dummies at the connection points. The client is
132  responsible
133  for deleting this molecule. If the core query is not matched, NULL
134  is returned.
135 */
137  const ROMol &core,
138  const MatchVectType &matchVect,
139  bool replaceDummies = true,
140  bool labelByIndex = false,
141  bool requireDummyMatch = false);
142 
143 //! \brief Returns a copy of an ROMol with the atoms and bonds that
144 //! do fall within a substructure match removed.
145 //!
146 //! dummy atoms are left to indicate attachment points.
147 //!
148 /*!
149  Note that this is essentially identical to the replaceSidechains function,
150  except we
151  invert the query and replace the atoms that *do* match the query.
152 
153  \param mol - the ROMol of interest
154  \param coreQuery - a query ROMol to be used to match the core
155  \param replaceDummies - if set, atoms matching dummies in the core will also
156  be replaced
157  \param labelByIndex - if set, the dummy atoms at attachment points are
158  labelled with the
159  index+1 of the corresponding atom in the core
160  \param requireDummyMatch - if set, only side chains that are connected to
161  atoms in
162  the core that have attached dummies will be
163  considered.
164  Molecules that have sidechains that are attached
165  at other points will be rejected (NULL returned).
166  \param useChirality - if set, match the coreQuery using chirality
167 
168  \return a copy of \c mol with the non-matching atoms and bonds (if any)
169  removed and dummies at the connection points. The client is
170  responsible
171  for deleting this molecule. If the core query is not matched, NULL
172  is returned.
173 */
175  const ROMol &coreQuery,
176  bool replaceDummies = true,
177  bool labelByIndex = false,
178  bool requireDummyMatch = false,
179  bool useChirality = false);
180 
181 //! \brief Carries out a Murcko decomposition on the molecule provided
182 //!
183 /*!
184 
185  \param mol - the ROMol of interest
186 
187  \return a new ROMol with the Murcko scaffold
188  The client is responsible for deleting this molecule.
189 */
191 
192 //! \brief Combined two molecules to create a new one
193 //!
194 /*!
195 
196  \param mol1 - the first ROMol to be combined
197  \param mol2 - the second ROMol to be combined
198  \param offset - a constant offset to be added to every
199  atom position in mol2
200 
201  \return a new ROMol with the two molecules combined.
202  The new molecule has not been sanitized.
203  The client is responsible for deleting this molecule.
204 */
206  const ROMol &mol1, const ROMol &mol2,
207  RDGeom::Point3D offset = RDGeom::Point3D(0, 0, 0));
208 
209 //! \brief Adds named recursive queries to a molecule's atoms based on atom
210 // labels
211 //!
212 /*!
213 
214  \param mol - the molecule to be modified
215  \param queries - the dictionary of named queries to add
216  \param propName - the atom property to use to get query names
217  \param reactantLabels - to store pairs of (atom index, query string)
218 
219 
220  NOTES:
221  - existing query information, if present, will be supplemented (AND logic)
222  - non-query atoms will be replaced with query atoms using only the query
223  logic
224  - query names can be present as comma separated lists, they will then
225  be combined using OR logic.
226  - throws a KeyErrorException if a particular query name is not present
227  in \c queries
228 
229 */
231  ROMol &mol, const std::map<std::string, ROMOL_SPTR> &queries,
232  const std::string &propName,
233  std::vector<std::pair<unsigned int, std::string>> *reactantLabels = NULL);
234 
235 //! \brief parses a query definition file and sets up a set of definitions
236 //! suitable for use by addRecursiveQueries()
237 /*!
238 
239  \param filename - the name of the file to be read
240  \param queryDefs - the dictionary of named queries (return value)
241  \param standardize - if true, query names will be converted to lower
242  case
243  \param delimiter - the line delimiter in the file
244  \param comment - text used to recognize comment lines
245  \param nameColumn - column with the names of queries
246  \param smartsColumn - column with the SMARTS definitions of the queries
247 
248 */
250  const std::string &filename, std::map<std::string, ROMOL_SPTR> &queryDefs,
251  bool standardize = true, const std::string &delimiter = "\t",
252  const std::string &comment = "//", unsigned int nameColumn = 0,
253  unsigned int smartsColumn = 1);
254 //! \overload
256  std::istream *inStream, std::map<std::string, ROMOL_SPTR> &queryDefs,
257  bool standardize = true, const std::string &delimiter = "\t",
258  const std::string &comment = "//", unsigned int nameColumn = 0,
259  unsigned int smartsColumn = 1);
260 //! \brief equivalent to parseQueryDefFile() but the query definitions are
261 // explicitly passed in
263  const std::string &queryDefText,
264  std::map<std::string, ROMOL_SPTR> &queryDefs, bool standardize = true,
265  const std::string &delimiter = "\t", const std::string &comment = "//",
266  unsigned int nameColumn = 0, unsigned int smartsColumn = 1);
267 } // namespace RDKit
268 
269 #endif
RDKit::parseQueryDefFile
RDKIT_CHEMTRANSFORMS_EXPORT void parseQueryDefFile(const std::string &filename, std::map< std::string, ROMOL_SPTR > &queryDefs, bool standardize=true, const std::string &delimiter="\t", const std::string &comment="//", unsigned int nameColumn=0, unsigned int smartsColumn=1)
parses a query definition file and sets up a set of definitions suitable for use by addRecursiveQueri...
RDKit::replaceSubstructs
RDKIT_CHEMTRANSFORMS_EXPORT std::vector< ROMOL_SPTR > replaceSubstructs(const ROMol &mol, const ROMol &query, const ROMol &replacement, bool replaceAll=false, unsigned int replacementConnectionPoint=0, bool useChirality=false)
Returns a list of copies of an ROMol with the atoms and bonds that match a pattern replaced with the ...
RDKit::combineMols
RDKIT_CHEMTRANSFORMS_EXPORT ROMol * combineMols(const ROMol &mol1, const ROMol &mol2, RDGeom::Point3D offset=RDGeom::Point3D(0, 0, 0))
Combined two molecules to create a new one.
RDGeom::Point3D
Definition: point.h:46
RDKit::parseQueryDefText
RDKIT_CHEMTRANSFORMS_EXPORT void parseQueryDefText(const std::string &queryDefText, std::map< std::string, ROMOL_SPTR > &queryDefs, bool standardize=true, const std::string &delimiter="\t", const std::string &comment="//", unsigned int nameColumn=0, unsigned int smartsColumn=1)
equivalent to parseQueryDefFile() but the query definitions are
MolFragmenter.h
RDKit::replaceSidechains
RDKIT_CHEMTRANSFORMS_EXPORT ROMol * replaceSidechains(const ROMol &mol, const ROMol &coreQuery, bool useChirality=false)
Returns a copy of an ROMol with the atoms and bonds that don't fall within a substructure match remov...
RDKit::ROMol
Definition: ROMol.h:171
RDKit::addRecursiveQueries
RDKIT_CHEMTRANSFORMS_EXPORT void addRecursiveQueries(ROMol &mol, const std::map< std::string, ROMOL_SPTR > &queries, const std::string &propName, std::vector< std::pair< unsigned int, std::string >> *reactantLabels=NULL)
Adds named recursive queries to a molecule's atoms based on atom.
RDKit
Std stuff.
Definition: Atom.h:30
RDKit::deleteSubstructs
RDKIT_CHEMTRANSFORMS_EXPORT ROMol * deleteSubstructs(const ROMol &mol, const ROMol &query, bool onlyFrags=false, bool useChirality=false)
Returns a copy of an ROMol with the atoms and bonds that match a pattern removed.
RDKit::replaceCore
RDKIT_CHEMTRANSFORMS_EXPORT ROMol * replaceCore(const ROMol &mol, const ROMol &core, const MatchVectType &matchVect, bool replaceDummies=true, bool labelByIndex=false, bool requireDummyMatch=false)
Returns a copy of an ROMol with the atoms and bonds that are referenced by the MatchVector removed....
RDKIT_CHEMTRANSFORMS_EXPORT
#define RDKIT_CHEMTRANSFORMS_EXPORT
Definition: export.h:73
RDKit::MurckoDecompose
RDKIT_CHEMTRANSFORMS_EXPORT ROMol * MurckoDecompose(const ROMol &mol)
Carries out a Murcko decomposition on the molecule provided.
RDKit::MatchVectType
std::vector< std::pair< int, int > > MatchVectType
used to return matches from substructure searching, The format is (queryAtomIdx, molAtomIdx)
Definition: FragFPGenerator.h:24
RDKit::ROMOL_SPTR
boost::shared_ptr< ROMol > ROMOL_SPTR
Definition: ChemTransforms.h:22
SubstructMatch.h
export.h