RDKit
Open-source cheminformatics and machine learning.
SLNParseOps.h
Go to the documentation of this file.
1 //
2 // Copyright (c) 2008, Novartis Institutes for BioMedical Research Inc.
3 // All rights reserved.
4 //
5 // Redistribution and use in source and binary forms, with or without
6 // modification, are permitted provided that the following conditions are
7 // met:
8 //
9 // * Redistributions of source code must retain the above copyright
10 // notice, this list of conditions and the following disclaimer.
11 // * Redistributions in binary form must reproduce the above
12 // copyright notice, this list of conditions and the following
13 // disclaimer in the documentation and/or other materials provided
14 // with the distribution.
15 // * Neither the name of Novartis Institutes for BioMedical Research Inc.
16 // nor the names of its contributors may be used to endorse or promote
17 // products derived from this software without specific prior
18 // written permission.
19 //
20 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
23 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
24 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
25 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
26 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
30 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 //
32 // Created by Greg Landrum, September 2006
33 //
34 #include <RDGeneral/export.h>
35 #ifndef __RD_SLNPARSEOPS_H__
36 #define __RD_SLNPARSEOPS_H__
37 
38 #include <vector>
41 #include <GraphMol/RDKitBase.h>
42 #include <GraphMol/RDKitQueries.h>
44 #include <boost/lexical_cast.hpp>
46 
47 namespace RDKit {
48 namespace SLNParse {
49 namespace {
50 //! set a bookmark in the molecule if the atom has an associated ID:
51 void bookmarkAtomID(RWMol *mp, Atom *atom) {
52  PRECONDITION(mp, "bad molecule");
53  PRECONDITION(atom, "bad atom");
54  unsigned int label;
55  if (atom->getPropIfPresent(common_properties::_AtomID, label)) {
56  if (mp->hasAtomBookmark(label)) {
57  std::stringstream err;
58  err << "SLN Parser error: Atom ID " << label << " used a second time.";
59  throw SLNParseException(err.str());
60  }
61  if (mp->hasBondBookmark(label)) {
62  std::stringstream err;
63  err << "SLN Parser error: Atom ID " << label
64  << " appears *after* its ring closure.";
65  throw SLNParseException(err.str());
66  }
67  mp->setAtomBookmark(atom, label);
68  }
69 }
70 
71 //! adds a bond, being careful to handle aromaticity properly
72 template <typename BondType>
73 void addBondToMol(RWMol *mp, BondType *bond) {
74  PRECONDITION(mp, "null molecule");
75  PRECONDITION(bond, "null bond");
76  mp->addBond(bond, true);
77  if (bond->getBondType() == Bond::AROMATIC) {
78  // SLN doesn't have aromatic atom types, aromaticity is a property
79  // of the bonds themselves, so we need to set the atom types:
80  bond->setIsAromatic(true);
81  bond->getBeginAtom()->setIsAromatic(true);
82  bond->getEndAtom()->setIsAromatic(true);
83  }
84 }
85 } // end of anonymous namespace
86 
87 // ------------------------------------------------------------------------------------
88 //! initialize a molecule
89 template <typename AtomType>
90 int startMol(std::vector<RWMol *> &molList, AtomType *firstAtom,
91  bool doingQuery) {
92  PRECONDITION(firstAtom, "empty atom");
93  RWMol *mp = new RWMol();
94  mp->addAtom(firstAtom, true, true);
95  bookmarkAtomID(mp, firstAtom);
96 
97  if (!doingQuery) {
98  // add any hydrogens that are set on the atom, otherwise getting the
99  // numbering right
100  // is just too hard:
101  for (unsigned int i = 0; i < firstAtom->getNumExplicitHs(); ++i) {
102  int hIdx = mp->addAtom(new Atom(1), false, true);
103  mp->addBond(0, hIdx, Bond::SINGLE);
104  }
105  firstAtom->setNumExplicitHs(0);
106  }
107 
108  int sz = molList.size();
109  molList.push_back(mp);
110  return sz;
111 };
112 
113 // ------------------------------------------------------------------------------------
114 //! adds an atom to a molecule
115 template <typename AtomType, typename BondType>
116 void addAtomToMol(std::vector<RWMol *> &molList, unsigned int idx,
117  AtomType *atom, BondType *bond, bool doingQuery) {
118  PRECONDITION(idx < molList.size(), "bad index");
119  RWMol *mp = molList[idx];
120  PRECONDITION(mp, "null molecule");
121  PRECONDITION(atom, "empty atom");
122  PRECONDITION(bond, "null bond");
123 
124  Atom *a1 = mp->getActiveAtom();
125  int atomIdx1 = a1->getIdx();
126  int atomIdx2 = mp->addAtom(atom, true, true);
127  bookmarkAtomID(mp, atom);
128  bond->setOwningMol(mp);
129  bond->setBeginAtomIdx(atomIdx1);
130  bond->setEndAtomIdx(atomIdx2);
131  addBondToMol(mp, bond);
132 
133  if (!doingQuery) {
134  // add any hydrogens that are set on the atom, otherwise getting the
135  // numbering right
136  // is just too hard:
137  for (unsigned int i = 0; i < atom->getNumExplicitHs(); ++i) {
138  int hIdx = mp->addAtom(new Atom(1), false, true);
139  mp->addBond(atomIdx2, hIdx, Bond::SINGLE);
140  }
141  atom->setNumExplicitHs(0);
142  }
143 }
144 //! \overload
145 template <typename AtomType>
146 void addAtomToMol(std::vector<RWMol *> &molList, unsigned int idx,
147  AtomType *atom, bool doingQuery) {
148  addAtomToMol(molList, idx, atom, new Bond(Bond::SINGLE), doingQuery);
149 }
150 
151 // ------------------------------------------------------------------------------------
152 //! closes an indexed ring in a molecule using the bond provided
153 // The bond is formed from the atom in the molecule with the
154 // corresponding bookmark to the active atom
155 //
156 template <typename BondType>
157 void closeRingBond(std::vector<RWMol *> &molList, unsigned int molIdx,
158  unsigned int ringIdx, BondType *bond,
159  bool postponeAllowed = true) {
160  PRECONDITION(molIdx < molList.size(), "bad index");
161  RWMol *mp = molList[molIdx];
162  PRECONDITION(mp, "null molecule");
163  PRECONDITION(bond, "Null bond");
164 
165  if (!mp->hasAtomBookmark(ringIdx)) {
166  if (postponeAllowed) {
167  // save it for later:
168  bond->setOwningMol(mp);
169  bond->setEndAtomIdx(mp->getActiveAtom()->getIdx());
170  mp->setBondBookmark(bond, ringIdx);
171  return;
172  } else {
173  std::stringstream err;
174  err << "SLN Parser error: Ring closure " << ringIdx
175  << " does not have a corresponding opener.";
176  throw SLNParseException(err.str());
177  }
178  }
179  Atom *opener = mp->getAtomWithBookmark(ringIdx);
180  CHECK_INVARIANT(opener, "invalid atom");
181 
182  Atom *closer = mp->getActiveAtom();
183  bond->setOwningMol(mp);
184  bond->setBeginAtom(opener);
185  bond->setEndAtom(closer);
186  addBondToMol(mp, bond);
187 };
188 //! \overload
189 void closeRingBond(std::vector<RWMol *> &molList, unsigned int molIdx,
190  unsigned int ringIdx) {
191  auto *newBond = new Bond(Bond::SINGLE);
192  try {
193  closeRingBond(molList, molIdx, ringIdx, newBond);
194  } catch (...) {
195  delete newBond;
196  throw;
197  }
198 };
199 
200 // ------------------------------------------------------------------------------------
201 // NOTE: this takes over responsibility for the bond
202 template <typename BondType>
203 int addBranchToMol(std::vector<RWMol *> &molList, unsigned int molIdx,
204  unsigned int branchIdx, BondType *&bond) {
205  PRECONDITION(molIdx < molList.size(), "bad index");
206  RWMol *mp = molList[molIdx];
207  PRECONDITION(mp, "null molecule");
208  PRECONDITION(branchIdx < molList.size(), "bad index");
209  RWMol *branch = molList[branchIdx];
210  PRECONDITION(branch, "null branch");
211  PRECONDITION(bond, "null bond");
212 
213  unsigned int activeAtomIdx = mp->getActiveAtom()->getIdx();
214  unsigned int nOrigAtoms = mp->getNumAtoms();
215 
216  //
217  // Add the fragment's atoms and bonds to the molecule:
218  //
219  mp->insertMol(*branch);
220 
221  // copy in any atom bookmarks from the branch:
222  for (ROMol::ATOM_BOOKMARK_MAP::const_iterator bmIt =
223  branch->getAtomBookmarks()->begin();
224  bmIt != branch->getAtomBookmarks()->end(); ++bmIt) {
225  if (bmIt->first < 0) continue;
226  if (mp->hasAtomBookmark(bmIt->first)) {
227  std::stringstream err;
228  err << "SLN Parser error: Atom ID " << bmIt->first
229  << " used a second time.";
230  throw SLNParseException(err.str());
231  } else if (mp->hasBondBookmark(bmIt->first)) {
232  std::stringstream err;
233  err << "SLN Parser error: Atom ID " << bmIt->first
234  << " appears *after* its ring closure.";
235  throw SLNParseException(err.str());
236  } else {
237  CHECK_INVARIANT(bmIt->second.size() == 1,
238  "bad atom bookmark list on branch");
239  Atom *tgtAtom =
240  mp->getAtomWithIdx((*bmIt->second.begin())->getIdx() + nOrigAtoms);
241  mp->setAtomBookmark(tgtAtom, bmIt->first);
242  }
243  }
244 
245  // loop over bond bookmarks in the branch and close the corresponding rings
246  for (ROMol::BOND_BOOKMARK_MAP::const_iterator bmIt =
247  branch->getBondBookmarks()->begin();
248  bmIt != branch->getBondBookmarks()->end(); ++bmIt) {
249  CHECK_INVARIANT(bmIt->second.size() >= 1,
250  "bad bond bookmark list on branch");
251  for (ROMol::BOND_PTR_LIST::const_iterator bondIt = bmIt->second.begin();
252  bondIt != bmIt->second.end(); ++bondIt) {
253  Bond *tgtBond = *bondIt;
254  if (bmIt->first > 0 && mp->hasAtomBookmark(bmIt->first)) {
255  Atom *tmpAtom = mp->getActiveAtom();
256  mp->setActiveAtom(
257  mp->getAtomWithIdx(tgtBond->getEndAtomIdx() + nOrigAtoms));
258  closeRingBond(molList, molIdx, bmIt->first, tgtBond, false);
259  mp->setActiveAtom(tmpAtom);
260  } else {
261  // no partner found yet, copy into this mol:
262  tgtBond->setOwningMol(mp);
263  tgtBond->setEndAtomIdx(tgtBond->getEndAtomIdx() + nOrigAtoms);
264  mp->setBondBookmark(tgtBond, bmIt->first);
265  }
266  }
267  }
268 
269  // set the connecting bond:
270  if (bond->getBondType() != Bond::IONIC) {
271  bond->setOwningMol(mp);
272  bond->setBeginAtomIdx(activeAtomIdx);
273  bond->setEndAtomIdx(nOrigAtoms);
274  addBondToMol(mp, bond);
275  } else {
276  delete bond;
277  }
278  bond = 0;
279 
280  delete branch;
281  unsigned int sz = molList.size();
282  if (sz == branchIdx + 1) {
283  molList.resize(sz - 1);
284  }
285  return molIdx;
286 };
287 //! \overload
288 int addBranchToMol(std::vector<RWMol *> &molList, unsigned int molIdx,
289  unsigned int branchIdx) {
290  Bond *newBond = new Bond(Bond::SINGLE);
291  int ret = -1;
292  try {
293  ret = addBranchToMol(molList, molIdx, branchIdx, newBond);
294  } catch (...) {
295  delete newBond;
296  throw;
297  }
298  return ret;
299 };
300 
301 // ------------------------------------------------------------------------------------
302 //! adds the atoms and bonds from a fragment to the molecule, sets no bond
303 // between them
304 int addFragToMol(std::vector<RWMol *> &molList, unsigned int molIdx,
305  unsigned int fragIdx) {
306  Bond *newBond = new Bond(Bond::IONIC);
307  int ret = -1;
308  try {
309  ret = addBranchToMol(molList, molIdx, fragIdx, newBond);
310  } catch (...) {
311  delete newBond;
312  throw;
313  }
314  return ret;
315 }
316 
317 //! convenience function to convert the argument to a string
318 template <typename T>
319 std::string convertToString(T val) {
320  std::string res = boost::lexical_cast<std::string>(val);
321  return res;
322 }
323 
324 } // end of namespace SLNParse
325 } // end of namespace RDKit
326 #endif
RDKit::common_properties::_AtomID
const RDKIT_RDGENERAL_EXPORT std::string _AtomID
RDKit::ROMol::getAtomBookmarks
ATOM_BOOKMARK_MAP * getAtomBookmarks()
returns a pointer to all of our atom bookmarks
Definition: ROMol.h:387
RDKit::ROMol::setBondBookmark
void setBondBookmark(Bond *bond, int mark)
associates a Bond pointer with a bookmark
Definition: ROMol.h:390
RDKitQueries.h
pulls in the RDKit Query functionality
RDKit::RWMol::setActiveAtom
void setActiveAtom(Atom *atom)
sets our activeAtom
RDKit::ROMol::getAtomWithBookmark
Atom * getAtomWithBookmark(int mark)
returns the first Atom associated with the bookmark provided
BoostStartInclude.h
RDKit::ROMol::hasBondBookmark
bool hasBondBookmark(int mark) const
queries whether or not any bonds are associated with a bookmark
Definition: ROMol.h:408
RDKit::Bond
class for representing a bond
Definition: Bond.h:47
RDKit::RWMol
RWMol is a molecule class that is intended to be edited.
Definition: RWMol.h:31
RDKit::Bond::SINGLE
@ SINGLE
Definition: Bond.h:58
CHECK_INVARIANT
#define CHECK_INVARIANT(expr, mess)
Definition: Invariant.h:101
RDKit::RWMol::insertMol
void insertMol(const ROMol &other)
insert the atoms and bonds from other into this molecule
RDKit::Atom::getIdx
unsigned int getIdx() const
returns our index within the ROMol
Definition: Atom.h:133
RDKit::Bond::IONIC
@ IONIC
Definition: Bond.h:70
RDKit::SLNParse::startMol
int startMol(std::vector< RWMol * > &molList, AtomType *firstAtom, bool doingQuery)
initialize a molecule
Definition: SLNParseOps.h:90
RDKit::RWMol::addAtom
unsigned int addAtom(bool updateLabel=true)
adds an empty Atom to our collection
RDKit::Atom
The class for representing atoms.
Definition: Atom.h:69
BoostEndInclude.h
RDKit::Bond::getEndAtomIdx
unsigned int getEndAtomIdx() const
returns the index of our end Atom
Definition: Bond.h:185
RDKit::ROMol::getAtomWithIdx
Atom * getAtomWithIdx(unsigned int idx)
returns a pointer to a particular Atom
RDKitBase.h
pulls in the core RDKit functionality
RDKit::SLNParse::addBranchToMol
int addBranchToMol(std::vector< RWMol * > &molList, unsigned int molIdx, unsigned int branchIdx, BondType *&bond)
Definition: SLNParseOps.h:203
RDKit::Bond::setEndAtomIdx
void setEndAtomIdx(unsigned int what)
sets the index of our end Atom
RDKit::RWMol::addBond
unsigned int addBond(unsigned int beginAtomIdx, unsigned int endAtomIdx, Bond::BondType order=Bond::UNSPECIFIED)
adds a Bond between the indicated Atoms
RDKit::SLNParse::addAtomToMol
void addAtomToMol(std::vector< RWMol * > &molList, unsigned int idx, AtomType *atom, BondType *bond, bool doingQuery)
adds an atom to a molecule
Definition: SLNParseOps.h:116
SLNAttribs.h
RDKit::RWMol::getActiveAtom
Atom * getActiveAtom()
returns a pointer to the "active" Atom
RDKit::ROMol::getNumAtoms
unsigned int getNumAtoms(bool onlyExplicit=1) const
returns our number of atoms
RDKit
Std stuff.
Definition: Atom.h:30
PRECONDITION
#define PRECONDITION(expr, mess)
Definition: Invariant.h:109
RDKit::ROMol::setAtomBookmark
void setAtomBookmark(Atom *at, int mark)
associates an Atom pointer with a bookmark
Definition: ROMol.h:362
RDKit::SLNParseException
Definition: SLNParse.h:57
RDKit::SLNParse::addFragToMol
int addFragToMol(std::vector< RWMol * > &molList, unsigned int molIdx, unsigned int fragIdx)
adds the atoms and bonds from a fragment to the molecule, sets no bond
Definition: SLNParseOps.h:304
RDKit::Bond::setOwningMol
void setOwningMol(ROMol *other)
sets our owning molecule
RDKit::Atom::setOwningMol
void setOwningMol(ROMol *other)
sets our owning molecule
RDKit::ROMol::getBondBookmarks
BOND_BOOKMARK_MAP * getBondBookmarks()
returns a pointer to all of our bond bookmarks
Definition: ROMol.h:410
RDKit::SLNParse::convertToString
std::string convertToString(T val)
convenience function to convert the argument to a string
Definition: SLNParseOps.h:319
SLNParse.h
RDKit::Bond::AROMATIC
@ AROMATIC
Definition: Bond.h:69
RDKit::ROMol::hasAtomBookmark
bool hasAtomBookmark(int mark) const
queries whether or not any atoms are associated with a bookmark
Definition: ROMol.h:385
RDKit::SLNParse::closeRingBond
void closeRingBond(std::vector< RWMol * > &molList, unsigned int molIdx, unsigned int ringIdx, BondType *bond, bool postponeAllowed=true)
closes an indexed ring in a molecule using the bond provided
Definition: SLNParseOps.h:157
export.h