RDKit
Open-source cheminformatics and machine learning.
MolWriters.h
Go to the documentation of this file.
1 //
2 // Copyright (C) 2002-2017 Greg Landrum, Rational Discovery LLC
3 //
4 // @@ All Rights Reserved @@
5 // This file is part of the RDKit.
6 // The contents are covered by the terms of the BSD license
7 // which is included in the file license.txt, found at the root
8 // of the RDKit source tree.
9 //
10 
11 #include <RDGeneral/export.h>
12 #ifndef _RD_MOLWRITERS_H_
13 #define _RD_MOLWRITERS_H_
14 
15 #include <RDGeneral/types.h>
16 
17 #include <string>
18 #include <iostream>
19 #include <GraphMol/ROMol.h>
20 
21 namespace RDKit {
22 
23 static int defaultConfId = -1;
25  public:
26  virtual ~MolWriter() {}
27  virtual void write(const ROMol &mol, int confId = defaultConfId) = 0;
28  virtual void flush() = 0;
29  virtual void close() = 0;
30  virtual void setProps(const STR_VECT &propNames) = 0;
31  virtual unsigned int numMols() const = 0;
32 };
33 
34 //! The SmilesWriter is for writing molecules and properties to
35 //! delimited text files.
37  /******************************************************************************
38  * A Smiles Table writer - this is how it is used
39  * - create a SmilesWriter with a output file name (or a ostream), a
40  *delimiter,
41  * and a list of properties that need to be written out
42  * - then a call is made to the write function for each molecule that needs
43  *to
44  * be written out
45  ******************************************************************************/
46  public:
47  /*!
48  \param fileName : filename to write to ("-" to write to stdout)
49  \param delimiter : delimiter to use in the text file
50  \param nameHeader : used to label the name column in the output. If this
51  is provided as the empty string, no names will be
52  written.
53  \param includeHeader : toggles inclusion of a header line in the output
54  \param isomericSmiles : toggles generation of isomeric SMILES
55  \param kekuleSmiles : toggles the generation of kekule SMILES
56 
57  */
58  SmilesWriter(const std::string &fileName, const std::string &delimiter = " ",
59  const std::string &nameHeader = "Name",
60  bool includeHeader = true, bool isomericSmiles = true,
61  bool kekuleSmiles = false);
62  //! \overload
63  SmilesWriter(std::ostream *outStream, std::string delimiter = " ",
64  std::string nameHeader = "Name", bool includeHeader = true,
65  bool takeOwnership = false, bool isomericSmiles = true,
66  bool kekuleSmiles = false);
67 
68  ~SmilesWriter();
69 
70  //! \brief set a vector of property names that are need to be
71  //! written out for each molecule
72  void setProps(const STR_VECT &propNames);
73 
74  //! \brief write a new molecule to the file
75  void write(const ROMol &mol, int confId = defaultConfId);
76 
77  //! \brief flush the ostream
78  void flush() {
79  PRECONDITION(dp_ostream, "no output stream");
80  try {
81  dp_ostream->flush();
82  } catch (...) {
83  try {
84  if (dp_ostream->good()) dp_ostream->setstate(std::ios::badbit);
85  } catch (const std::runtime_error &) {
86  }
87  }
88  };
89 
90  //! \brief close our stream (the writer cannot be used again)
91  void close() {
92  flush();
93  if (df_owner) {
94  delete dp_ostream;
95  df_owner = false;
96  }
97  dp_ostream = nullptr;
98  };
99 
100  //! \brief get the number of molecules written so far
101  unsigned int numMols() const { return d_molid; };
102 
103  private:
104  // local initialization
105  void init(const std::string &delimiter, const std::string &nameHeader,
106  bool includeHeader, bool isomericSmiles, bool kekuleSmiles);
107 
108  // dumps a header line to the output stream
109  void dumpHeader() const;
110 
111  std::ostream *dp_ostream;
112  bool df_owner;
113  bool df_includeHeader; // whether or not to include a title line
114  unsigned int d_molid; // the number of the molecules we wrote so far
115  std::string d_delim; // delimiter string between various records
116  std::string d_nameHeader; // header for the name column in the output file
117  STR_VECT d_props; // list of property name that need to be written out
118  bool df_isomericSmiles; // whether or not to do isomeric smiles
119  bool df_kekuleSmiles; // whether or not to do kekule smiles
120 };
121 
122 //! The SDWriter is for writing molecules and properties to
123 //! SD files
125  /**************************************************************************************
126  * A SD file ( or stream) writer - this is how it is used
127  * - create a SDMolWriter with a output file name (or a ostream),
128  * and a list of properties that need to be written out
129  * - then a call is made to the write function for each molecule that needs
130  *to be written out
131  **********************************************************************************************/
132  public:
133  /*!
134  \param fileName : filename to write to ("-" to write to stdout)
135  */
136  SDWriter(const std::string &fileName);
137  SDWriter(std::ostream *outStream, bool takeOwnership = false);
138 
139  ~SDWriter();
140 
141  //! \brief set a vector of property names that are need to be
142  //! written out for each molecule
143  void setProps(const STR_VECT &propNames);
144 
145  //! \brief return the text that would be written to the file
146  static std::string getText(const ROMol &mol, int confId = defaultConfId,
147  bool kekulize = true, bool force_V3000 = false,
148  int molid = -1, STR_VECT *propNames = NULL);
149 
150  //! \brief write a new molecule to the file
151  void write(const ROMol &mol, int confId = defaultConfId);
152 
153  //! \brief flush the ostream
154  void flush() {
155  PRECONDITION(dp_ostream, "no output stream");
156  try {
157  dp_ostream->flush();
158  } catch (...) {
159  try {
160  if (dp_ostream->good()) dp_ostream->setstate(std::ios::badbit);
161  } catch (const std::runtime_error &) {
162  }
163  }
164  };
165 
166  //! \brief close our stream (the writer cannot be used again)
167  void close() {
168  flush();
169  if (df_owner) {
170  delete dp_ostream;
171  df_owner = false;
172  }
173  dp_ostream = nullptr;
174  };
175 
176  //! \brief get the number of molecules written so far
177  unsigned int numMols() const { return d_molid; };
178 
179  void setForceV3000(bool val) { df_forceV3000 = val; };
180  bool getForceV3000() const { return df_forceV3000; };
181 
182  void setKekulize(bool val) { df_kekulize = val; };
183  bool getKekulize() const { return df_kekulize; };
184 
185  private:
186  void writeProperty(const ROMol &mol, const std::string &name);
187 
188  std::ostream *dp_ostream;
189  bool df_owner;
190  unsigned int d_molid; // the number of the molecules we wrote so far
191  STR_VECT d_props; // list of property name that need to be written out
192  bool df_forceV3000; // force writing the mol blocks as V3000
193  bool df_kekulize; // toggle kekulization of molecules on writing
194 };
195 
196 //! The TDTWriter is for writing molecules and properties to
197 //! TDT files
199  /**************************************************************************************
200  * A TDT file ( or stream) writer - this is how it is used
201  * - create a TDTWriter with a output file name (or a ostream),
202  * and a list of properties that need to be written out
203  * - then a call is made to the write function for each molecule that needs
204  *to be written out
205  **********************************************************************************************/
206  public:
207  /*!
208  \param fileName : filename to write to ("-" to write to stdout)
209  */
210  TDTWriter(const std::string &fileName);
211  TDTWriter(std::ostream *outStream, bool takeOwnership = false);
212 
213  ~TDTWriter();
214 
215  //! \brief set a vector of property names that are need to be
216  //! written out for each molecule
217  void setProps(const STR_VECT &propNames);
218 
219  //! \brief write a new molecule to the file
220  void write(const ROMol &mol, int confId = defaultConfId);
221 
222  //! \brief flush the ostream
223  void flush() {
224  PRECONDITION(dp_ostream, "no output stream");
225  try {
226  dp_ostream->flush();
227  } catch (...) {
228  try {
229  if (dp_ostream->good()) dp_ostream->setstate(std::ios::badbit);
230  } catch (const std::runtime_error &) {
231  }
232  }
233  };
234 
235  //! \brief close our stream (the writer cannot be used again)
236  void close() {
237  // if we've written any mols, finish with a "|" line
238  if (dp_ostream && d_molid > 0) {
239  *dp_ostream << "|\n";
240  }
241  flush();
242  if (df_owner) {
243  delete dp_ostream;
244  df_owner = false;
245  }
246  dp_ostream = nullptr;
247  };
248 
249  //! \brief get the number of molecules written so far
250  unsigned int numMols() const { return d_molid; };
251 
252  void setWrite2D(bool state = true) { df_write2D = state; };
253  bool getWrite2D() const { return df_write2D; };
254 
255  void setWriteNames(bool state = true) { df_writeNames = state; };
256  bool getWriteNames() const { return df_writeNames; };
257 
258  void setNumDigits(unsigned int numDigits) { d_numDigits = numDigits; };
259  unsigned int getNumDigits() const { return d_numDigits; };
260 
261  private:
262  void writeProperty(const ROMol &mol, const std::string &name);
263 
264  std::ostream *dp_ostream;
265  bool df_owner;
266  unsigned int d_molid; // the number of molecules we wrote so far
267  STR_VECT d_props; // list of property name that need to be written out
268  bool df_write2D; // write 2D coordinates instead of 3D
269  bool df_writeNames; // write a name record for each molecule
270  unsigned int
271  d_numDigits; // number of digits to use in our output of coordinates;
272 };
273 
274 //! The PDBWriter is for writing molecules to Brookhaven Protein
275 //! DataBank format files.
277  public:
278  PDBWriter(const std::string &fileName, unsigned int flavor = 0);
279  PDBWriter(std::ostream *outStream, bool takeOwnership = false,
280  unsigned int flavor = 0);
281  ~PDBWriter();
282 
283  //! \brief write a new molecule to the file
284  void write(const ROMol &mol, int confId = defaultConfId);
285 
286  void setProps(const STR_VECT &){};
287 
288  //! \brief flush the ostream
289  void flush() {
290  PRECONDITION(dp_ostream, "no output stream");
291  try {
292  dp_ostream->flush();
293  } catch (...) {
294  try {
295  if (dp_ostream->good()) dp_ostream->setstate(std::ios::badbit);
296  } catch (const std::runtime_error &) {
297  }
298  }
299  };
300 
301  //! \brief close our stream (the writer cannot be used again)
302  void close() {
303  flush();
304  if (df_owner) {
305  delete dp_ostream;
306  df_owner = false;
307  }
308  dp_ostream = nullptr;
309  };
310 
311  //! \brief get the number of molecules written so far
312  unsigned int numMols() const { return d_count; };
313 
314  private:
315  std::ostream *dp_ostream;
316  unsigned int d_flavor;
317  unsigned int d_count;
318  bool df_owner;
319 };
320 } // namespace RDKit
321 
322 #endif
RDKit::TDTWriter::setWriteNames
void setWriteNames(bool state=true)
Definition: MolWriters.h:255
ROMol.h
Defines the primary molecule class ROMol as well as associated typedefs.
types.h
RDKit::PDBWriter
Definition: MolWriters.h:276
RDKit::SDWriter
Definition: MolWriters.h:124
RDKit::SmilesWriter::flush
void flush()
flush the ostream
Definition: MolWriters.h:78
RDKit::TDTWriter::numMols
unsigned int numMols() const
get the number of molecules written so far
Definition: MolWriters.h:250
RDKIT_FILEPARSERS_EXPORT
#define RDKIT_FILEPARSERS_EXPORT
Definition: export.h:216
RDKit::MolWriter
Definition: MolWriters.h:24
RDKit::SDWriter::close
void close()
close our stream (the writer cannot be used again)
Definition: MolWriters.h:167
RDKit::SDWriter::numMols
unsigned int numMols() const
get the number of molecules written so far
Definition: MolWriters.h:177
RDKit::SmilesWriter::numMols
unsigned int numMols() const
get the number of molecules written so far
Definition: MolWriters.h:101
RDKit::TDTWriter::getWrite2D
bool getWrite2D() const
Definition: MolWriters.h:253
RDKit::STR_VECT
std::vector< std::string > STR_VECT
Definition: Dict.h:29
RDKit::TDTWriter::getWriteNames
bool getWriteNames() const
Definition: MolWriters.h:256
RDKit::TDTWriter::close
void close()
close our stream (the writer cannot be used again)
Definition: MolWriters.h:236
RDKit::SDWriter::flush
void flush()
flush the ostream
Definition: MolWriters.h:154
RDKit::ROMol
Definition: ROMol.h:171
RDKit::SmilesWriter
Definition: MolWriters.h:36
RDKit::SDWriter::getKekulize
bool getKekulize() const
Definition: MolWriters.h:183
RDKit::SDWriter::getForceV3000
bool getForceV3000() const
Definition: MolWriters.h:180
RDKit::PDBWriter::numMols
unsigned int numMols() const
get the number of molecules written so far
Definition: MolWriters.h:312
RDKit::PDBWriter::setProps
void setProps(const STR_VECT &)
Definition: MolWriters.h:286
RDKit::SDWriter::setForceV3000
void setForceV3000(bool val)
Definition: MolWriters.h:179
RDKit::SmilesWriter::close
void close()
close our stream (the writer cannot be used again)
Definition: MolWriters.h:91
RDKit
Std stuff.
Definition: Atom.h:30
RDKit::defaultConfId
static int defaultConfId
Definition: MolWriters.h:23
RDKit::PDBWriter::close
void close()
close our stream (the writer cannot be used again)
Definition: MolWriters.h:302
PRECONDITION
#define PRECONDITION(expr, mess)
Definition: Invariant.h:109
RDKit::SDWriter::setKekulize
void setKekulize(bool val)
Definition: MolWriters.h:182
RDKit::TDTWriter::setNumDigits
void setNumDigits(unsigned int numDigits)
Definition: MolWriters.h:258
RDKit::TDTWriter
Definition: MolWriters.h:198
RDKit::TDTWriter::setWrite2D
void setWrite2D(bool state=true)
Definition: MolWriters.h:252
RDKit::TDTWriter::flush
void flush()
flush the ostream
Definition: MolWriters.h:223
RDKit::PDBWriter::flush
void flush()
flush the ostream
Definition: MolWriters.h:289
RDKit::MolWriter::~MolWriter
virtual ~MolWriter()
Definition: MolWriters.h:26
RDKit::TDTWriter::getNumDigits
unsigned int getNumDigits() const
Definition: MolWriters.h:259
export.h