RDKit
Open-source cheminformatics and machine learning.
FileParsers.h
Go to the documentation of this file.
1 //
2 // Copyright (C) 2002-2013 Greg Landrum, Rational Discovery LLC
3 //
4 // @@ All Rights Reserved @@
5 // This file is part of the RDKit.
6 // The contents are covered by the terms of the BSD license
7 // which is included in the file license.txt, found at the root
8 // of the RDKit source tree.
9 //
10 #include <RDGeneral/export.h>
11 #ifndef _RD_FILEPARSERS_H
12 #define _RD_FILEPARSERS_H
13 
14 #include <RDGeneral/types.h>
15 #include <GraphMol/RDKitBase.h>
16 
17 #include <string>
18 #include <iostream>
19 #include <vector>
20 #include <exception>
21 
22 #include <boost/shared_ptr.hpp>
23 
24 namespace RDKit {
25 const int MOLFILE_MAXLINE = 256;
26 RDKIT_FILEPARSERS_EXPORT std::string strip(const std::string &orig);
27 
28 class MolFileUnhandledFeatureException : public std::exception {
29  public:
30  //! construct with an error message
31  explicit MolFileUnhandledFeatureException(const char *msg) : _msg(msg){};
32  //! construct with an error message
33  explicit MolFileUnhandledFeatureException(const std::string msg)
34  : _msg(msg){};
35  //! get the error message
36  const char *message() const { return _msg.c_str(); };
37  ~MolFileUnhandledFeatureException() noexcept override{};
38 
39  private:
40  std::string _msg;
41 };
42 
43 //-----
44 // mol files
45 //-----
46 typedef std::vector<RWMOL_SPTR> RWMOL_SPTR_VECT;
47 // \brief construct a molecule from MDL mol data in a stream
48 /*!
49  * \param inStream - stream containing the data
50  * \param line - current line number (used for error reporting)
51  * \param sanitize - toggles sanitization and stereochemistry
52  * perception of the molecule
53  * \param removeHs - toggles removal of Hs from the molecule. H removal
54  * is only done if the molecule is sanitized
55  * \param line - current line number (used for error reporting)
56  * \param strictParsing - if not set, the parser is more lax about correctness
57  * of the contents.
58  *
59  */
60 RDKIT_FILEPARSERS_EXPORT RWMol *MolDataStreamToMol(std::istream *inStream,
61  unsigned int &line,
62  bool sanitize = true,
63  bool removeHs = true,
64  bool strictParsing = true);
65 // \overload
66 RDKIT_FILEPARSERS_EXPORT RWMol *MolDataStreamToMol(std::istream &inStream,
67  unsigned int &line,
68  bool sanitize = true,
69  bool removeHs = true,
70  bool strictParsing = true);
71 // \brief construct a molecule from an MDL mol block
72 /*!
73  * \param molBlock - string containing the mol block
74  * \param sanitize - toggles sanitization and stereochemistry
75  * perception of the molecule
76  * \param removeHs - toggles removal of Hs from the molecule. H removal
77  * is only done if the molecule is sanitized
78  * \param strictParsing - if set, the parser is more lax about correctness
79  * of the contents.
80  */
81 RDKIT_FILEPARSERS_EXPORT RWMol *MolBlockToMol(const std::string &molBlock,
82  bool sanitize = true,
83  bool removeHs = true,
84  bool strictParsing = true);
85 
86 // \brief construct a molecule from an MDL mol file
87 /*!
88  * \param fName - string containing the file name
89  * \param sanitize - toggles sanitization and stereochemistry
90  * perception of the molecule
91  * \param removeHs - toggles removal of Hs from the molecule. H removal
92  * is only done if the molecule is sanitized
93  * \param strictParsing - if set, the parser is more lax about correctness
94  * of the contents.
95  */
96 RDKIT_FILEPARSERS_EXPORT RWMol *MolFileToMol(const std::string &fName,
97  bool sanitize = true,
98  bool removeHs = true,
99  bool strictParsing = true);
100 
101 // \brief generates an MDL mol block for a molecule
102 /*!
103  * \param mol - the molecule in question
104  * \param includeStereo - toggles inclusion of stereochemistry information
105  * \param confId - selects the conformer to be used
106  * \param kekulize - triggers kekulization of the molecule before it is
107  * written
108  * \param forceV3000 - force generation a V3000 mol block (happens
109  * automatically with
110  * more than 999 atoms or bonds)
111  */
112 RDKIT_FILEPARSERS_EXPORT std::string MolToMolBlock(const ROMol &mol,
113  bool includeStereo = true,
114  int confId = -1,
115  bool kekulize = true,
116  bool forceV3000 = false);
117 // \brief Writes a molecule to an MDL mol file
118 /*!
119  * \param mol - the molecule in question
120  * \param fName - the name of the file to use
121  * \param includeStereo - toggles inclusion of stereochemistry information
122  * \param confId - selects the conformer to be used
123  * \param kekulize - triggers kekulization of the molecule before it is
124  * written
125  * \param forceV3000 - force generation a V3000 mol block (happens
126  * automatically with
127  * more than 999 atoms or bonds)
128  */
130  const ROMol &mol, const std::string &fName, bool includeStereo = true,
131  int confId = -1, bool kekulize = true, bool forceV3000 = false);
132 
133 RDKIT_FILEPARSERS_EXPORT std::string MolToXYZBlock(const ROMol &mol, int confId = -1);
134 
136  const ROMol &mol, const std::string &fName, int confId = -1);
137 
138 //-----
139 // TPL handling:
140 //-----
141 
142 //! \brief translate TPL data (BioCad format) into a multi-conf molecule
143 /*!
144  \param inStream: the stream from which to read
145  \param line: used to track the line number of errors
146  \param sanitize: toggles sanitization and stereochemistry
147  perception of the molecule
148  \param skipFirstConf: according to the TPL format description, the atomic
149  coords in the atom-information block describe the first
150  conformation and the first conf block describes second
151  conformation. The CombiCode, on the other hand, writes
152  the first conformation data both to the atom-information
153  block and to the first conf block. We want to be able to
154  read CombiCode-style tpls, so we'll allow this
155  mis-feature
156  to be parsed when this flag is set.
157 */
158 RDKIT_FILEPARSERS_EXPORT RWMol *TPLDataStreamToMol(std::istream *inStream,
159  unsigned int &line,
160  bool sanitize = true,
161  bool skipFirstConf = false);
162 
163 //! \brief construct a multi-conf molecule from a TPL (BioCad format) file
164 /*!
165  \param fName: the name of the file from which to read
166  \param sanitize: toggles sanitization and stereochemistry
167  perception of the molecule
168  \param skipFirstConf: according to the TPL format description, the atomic
169  coords in the atom-information block describe the first
170  conformation and the first conf block describes second
171  conformation. The CombiCode, on the other hand, writes
172  the first conformation data both to the atom-information
173  block and to the first conf block. We want to be able to
174  read CombiCode-style tpls, so we'll allow this
175  mis-feature
176  to be parsed when this flag is set.
177 */
178 RDKIT_FILEPARSERS_EXPORT RWMol *TPLFileToMol(const std::string &fName,
179  bool sanitize = true,
180  bool skipFirstConf = false);
181 
183  const ROMol &mol, const std::string &partialChargeProp = "_GasteigerCharge",
184  bool writeFirstConfTwice = false);
186  const ROMol &mol, const std::string &fName,
187  const std::string &partialChargeProp = "_GasteigerCharge",
188  bool writeFirstConfTwice = false);
189 
190 //-----
191 // MOL2 handling
192 //-----
193 
194 typedef enum {
195  CORINA = 0 //! supports output from Corina and some dbtranslate output
196 } Mol2Type;
197 
198 // \brief construct a molecule from a Tripos mol2 file
199 /*!
200  *
201  * \param fName - string containing the file name
202  * \param sanitize - toggles sanitization of the molecule
203  * \param removeHs - toggles removal of Hs from the molecule. H removal
204  * is only done if the molecule is sanitized
205  * \param variant - the atom type definitions to use
206  * \param cleanupSubstructures - toggles recognition and cleanup of common
207  * substructures
208  */
209 RDKIT_FILEPARSERS_EXPORT RWMol *Mol2FileToMol(const std::string &fName,
210  bool sanitize = true,
211  bool removeHs = true,
212  Mol2Type variant = CORINA,
213  bool cleanupSubstructures = true);
214 
215 // \brief construct a molecule from Tripos mol2 data in a stream
216 /*!
217  * \param inStream - stream containing the data
218  * \param sanitize - toggles sanitization of the molecule
219  * \param removeHs - toggles removal of Hs from the molecule. H removal
220  * is only done if the molecule is sanitized
221  * \param variant - the atom type definitions to use
222  * \param cleanupSubstructures - toggles recognition and cleanup of common
223  * substructures
224  */
226  std::istream *inStream, bool sanitize = true, bool removeHs = true,
227  Mol2Type variant = CORINA, bool cleanupSubstructures = true);
228 // \overload
230  std::istream &inStream, bool sanitize = true, bool removeHs = true,
231  Mol2Type variant = CORINA, bool cleanupSubstructures = true);
232 
233 // \brief construct a molecule from a Tripos mol2 block
234 /*!
235  * \param molBlock - string containing the mol block
236  * \param sanitize - toggles sanitization of the molecule
237  * \param removeHs - toggles removal of Hs from the molecule. H removal
238  * is only done if the molecule is sanitized
239  * \param variant - the atom type definitions to use
240  * \param cleanupSubstructures - toggles recognition and cleanup of common
241  * substructures
242  */
244  const std::string &molBlock, bool sanitize = true, bool removeHs = true,
245  Mol2Type variant = CORINA, bool cleanupSubstructures = true);
246 
247 RDKIT_FILEPARSERS_EXPORT RWMol *PDBBlockToMol(const char *str,
248  bool sanitize = true,
249  bool removeHs = true,
250  unsigned int flavor = 0,
251  bool proximityBonding = true);
252 
253 RDKIT_FILEPARSERS_EXPORT RWMol *PDBBlockToMol(const std::string &str,
254  bool sanitize = true,
255  bool removeHs = true,
256  unsigned int flavor = 0,
257  bool proximityBonding = true);
259  std::istream *inStream, bool sanitize = true, bool removeHs = true,
260  unsigned int flavor = 0, bool proximityBonding = true);
262  std::istream &inStream, bool sanitize = true, bool removeHs = true,
263  unsigned int flavor = 0, bool proximityBonding = true);
264 RDKIT_FILEPARSERS_EXPORT RWMol *PDBFileToMol(const std::string &fname,
265  bool sanitize = true,
266  bool removeHs = true,
267  unsigned int flavor = 0,
268  bool proximityBonding = true);
269 
270 // \brief generates an PDB block for a molecule
271 /*!
272  * \param mol - the molecule in question
273  * \param confId - selects the conformer to be used
274  * \param flavor - controls what gets written:
275  * flavor & 1 : Write MODEL/ENDMDL lines around each record
276  * flavor & 2 : Don't write any CONECT records
277  * flavor & 4 : Write CONECT records in both directions
278  * flavor & 8 : Don't use multiple CONECTs to encode bond order
279  * flavor & 16 : Write MASTER record
280  * flavor & 32 : Write TER record
281  */
282 RDKIT_FILEPARSERS_EXPORT std::string MolToPDBBlock(const ROMol &mol,
283  int confId = -1,
284  unsigned int flavor = 0);
285 // \brief Writes a molecule to an MDL mol file
286 /*!
287  * \param mol - the molecule in question
288  * \param fName - the name of the file to use
289  * \param confId - selects the conformer to be used
290  * \param flavor - controls what gets written:
291  * flavor & 1 : Write MODEL/ENDMDL lines around each record
292  * flavor & 2 : Don't write any CONECT records
293  * flavor & 4 : Write CONECT records in both directions
294  * flavor & 8 : Don't use multiple CONECTs to encode bond order
295  * flavor & 16 : Write MASTER record
296  * flavor & 32 : Write TER record
297  */
298 RDKIT_FILEPARSERS_EXPORT void MolToPDBFile(const ROMol &mol,
299  const std::string &fname,
300  int confId = -1,
301  unsigned int flavor = 0);
302 
303 // \brief reads a molecule from the metadata in an RDKit-generated SVG file
304 /*!
305  * \param svg - string containing the SVG
306  * \param sanitize - toggles sanitization of the molecule
307  * \param removeHs - toggles removal of Hs from the molecule. H removal
308  * is only done if the molecule is sanitized
309  *
310  * **NOTE** This functionality should be considered beta.
311  */
312 RDKIT_FILEPARSERS_EXPORT RWMol *RDKitSVGToMol(const std::string &svg,
313  bool sanitize = true,
314  bool removeHs = true);
315 /*! \overload
316  */
317 RDKIT_FILEPARSERS_EXPORT RWMol *RDKitSVGToMol(std::istream *instream,
318  bool sanitize = true,
319  bool removeHs = true);
320 
321 } // namespace RDKit
322 
323 #endif
RDKit::MolToPDBFile
RDKIT_FILEPARSERS_EXPORT void MolToPDBFile(const ROMol &mol, const std::string &fname, int confId=-1, unsigned int flavor=0)
RDKit::MolToTPLText
RDKIT_FILEPARSERS_EXPORT std::string MolToTPLText(const ROMol &mol, const std::string &partialChargeProp="_GasteigerCharge", bool writeFirstConfTwice=false)
RDKit::Mol2Type
Mol2Type
Definition: FileParsers.h:194
RDKit::MOLFILE_MAXLINE
const int MOLFILE_MAXLINE
Definition: FileParsers.h:25
RDKit::MolFileUnhandledFeatureException::MolFileUnhandledFeatureException
MolFileUnhandledFeatureException(const char *msg)
construct with an error message
Definition: FileParsers.h:31
RDKit::MolToTPLFile
RDKIT_FILEPARSERS_EXPORT void MolToTPLFile(const ROMol &mol, const std::string &fName, const std::string &partialChargeProp="_GasteigerCharge", bool writeFirstConfTwice=false)
types.h
RDKit::CORINA
@ CORINA
Definition: FileParsers.h:195
RDKIT_FILEPARSERS_EXPORT
#define RDKIT_FILEPARSERS_EXPORT
Definition: export.h:216
RDKit::RWMol
RWMol is a molecule class that is intended to be edited.
Definition: RWMol.h:31
RDKit::Mol2DataStreamToMol
RDKIT_FILEPARSERS_EXPORT RWMol * Mol2DataStreamToMol(std::istream *inStream, bool sanitize=true, bool removeHs=true, Mol2Type variant=CORINA, bool cleanupSubstructures=true)
RDKit::strip
RDKIT_FILEPARSERS_EXPORT std::string strip(const std::string &orig)
RDKit::MolToXYZBlock
RDKIT_FILEPARSERS_EXPORT std::string MolToXYZBlock(const ROMol &mol, int confId=-1)
RDKit::TPLFileToMol
RDKIT_FILEPARSERS_EXPORT RWMol * TPLFileToMol(const std::string &fName, bool sanitize=true, bool skipFirstConf=false)
construct a multi-conf molecule from a TPL (BioCad format) file
RDKit::MolFileUnhandledFeatureException::~MolFileUnhandledFeatureException
~MolFileUnhandledFeatureException() noexcept override
Definition: FileParsers.h:37
RDKit::ROMol
Definition: ROMol.h:171
RDKitBase.h
pulls in the core RDKit functionality
RDKit::MolToXYZFile
RDKIT_FILEPARSERS_EXPORT void MolToXYZFile(const ROMol &mol, const std::string &fName, int confId=-1)
RDKit::MolBlockToMol
RDKIT_FILEPARSERS_EXPORT RWMol * MolBlockToMol(const std::string &molBlock, bool sanitize=true, bool removeHs=true, bool strictParsing=true)
RDKit::MolFileUnhandledFeatureException
Definition: FileParsers.h:28
RDKit::MolToMolFile
RDKIT_FILEPARSERS_EXPORT void MolToMolFile(const ROMol &mol, const std::string &fName, bool includeStereo=true, int confId=-1, bool kekulize=true, bool forceV3000=false)
RDKit::MolDataStreamToMol
RDKIT_FILEPARSERS_EXPORT RWMol * MolDataStreamToMol(std::istream *inStream, unsigned int &line, bool sanitize=true, bool removeHs=true, bool strictParsing=true)
RDKit::TPLDataStreamToMol
RDKIT_FILEPARSERS_EXPORT RWMol * TPLDataStreamToMol(std::istream *inStream, unsigned int &line, bool sanitize=true, bool skipFirstConf=false)
translate TPL data (BioCad format) into a multi-conf molecule
RDKit::Mol2FileToMol
RDKIT_FILEPARSERS_EXPORT RWMol * Mol2FileToMol(const std::string &fName, bool sanitize=true, bool removeHs=true, Mol2Type variant=CORINA, bool cleanupSubstructures=true)
RDKit::RDKitSVGToMol
RDKIT_FILEPARSERS_EXPORT RWMol * RDKitSVGToMol(const std::string &svg, bool sanitize=true, bool removeHs=true)
RDKit
Std stuff.
Definition: Atom.h:30
RDKit::MolFileUnhandledFeatureException::message
const char * message() const
get the error message
Definition: FileParsers.h:36
RDKit::RWMOL_SPTR_VECT
std::vector< RWMOL_SPTR > RWMOL_SPTR_VECT
Definition: FileParsers.h:46
RDKit::PDBBlockToMol
RDKIT_FILEPARSERS_EXPORT RWMol * PDBBlockToMol(const char *str, bool sanitize=true, bool removeHs=true, unsigned int flavor=0, bool proximityBonding=true)
RDKit::MolOps::removeHs
RDKIT_GRAPHMOL_EXPORT ROMol * removeHs(const ROMol &mol, bool implicitOnly=false, bool updateExplicitCount=false, bool sanitize=true)
returns a copy of a molecule with hydrogens removed
RDKit::MolFileUnhandledFeatureException::MolFileUnhandledFeatureException
MolFileUnhandledFeatureException(const std::string msg)
construct with an error message
Definition: FileParsers.h:33
RDKit::MolToMolBlock
RDKIT_FILEPARSERS_EXPORT std::string MolToMolBlock(const ROMol &mol, bool includeStereo=true, int confId=-1, bool kekulize=true, bool forceV3000=false)
RDKit::PDBFileToMol
RDKIT_FILEPARSERS_EXPORT RWMol * PDBFileToMol(const std::string &fname, bool sanitize=true, bool removeHs=true, unsigned int flavor=0, bool proximityBonding=true)
RDKit::Mol2BlockToMol
RDKIT_FILEPARSERS_EXPORT RWMol * Mol2BlockToMol(const std::string &molBlock, bool sanitize=true, bool removeHs=true, Mol2Type variant=CORINA, bool cleanupSubstructures=true)
RDKit::MolFileToMol
RDKIT_FILEPARSERS_EXPORT RWMol * MolFileToMol(const std::string &fName, bool sanitize=true, bool removeHs=true, bool strictParsing=true)
RDKit::PDBDataStreamToMol
RDKIT_FILEPARSERS_EXPORT RWMol * PDBDataStreamToMol(std::istream *inStream, bool sanitize=true, bool removeHs=true, unsigned int flavor=0, bool proximityBonding=true)
RDKit::MolToPDBBlock
RDKIT_FILEPARSERS_EXPORT std::string MolToPDBBlock(const ROMol &mol, int confId=-1, unsigned int flavor=0)
export.h