RDKit
Open-source cheminformatics and machine learning.
SubstructureCache.h
Go to the documentation of this file.
1 //
2 // Copyright (C) 2014 Novartis Institutes for BioMedical Research
3 //
4 // @@ All Rights Reserved @@
5 // This file is part of the RDKit.
6 // The contents are covered by the terms of the BSD license
7 // which is included in the file license.txt, found at the root
8 // of the RDKit source tree.
9 //
10 #include <RDGeneral/export.h>
11 #pragma once
12 #include <list>
13 #include <vector>
14 #include <string>
15 #include <stdexcept>
16 #include "../RDKitBase.h"
17 //#include "../Fingerprints/MorganFingerprints.h"
18 #include "Graph.h"
19 #include "Seed.h"
20 #include "DebugTrace.h" // algorithm filter definitions
21 
22 namespace RDKit {
23 namespace FMCS {
25  public:
26 #pragma pack(push, 1)
28  typedef unsigned long long TValue;
30 
31  public:
32  KeyNumericMetrics() : Value(0) {}
33  };
34 #pragma pack(pop)
35 
36  struct HashKey {
38 
39  public:
40  void computeKey(const Seed& seed,
41  const std::vector<unsigned>& queryAtomLabels,
42  const std::vector<unsigned>& queryBondLabels) {
43  computeMorganCodeHash(seed, queryAtomLabels, queryBondLabels);
44  }
45 
46  private:
47  void computeMorganCodeHash(const Seed& seed,
48  const std::vector<unsigned>& queryAtomLabels,
49  const std::vector<unsigned>& queryBondLabels) {
50  size_t nv = seed.getNumAtoms();
51  size_t ne = seed.getNumBonds();
52  std::vector<unsigned long> currCodes(nv);
53  std::vector<unsigned long> prevCodes(nv);
54  size_t nIterations = seed.getNumBonds();
55  if (nIterations > 5) nIterations = 5;
56 
57  for (unsigned seedAtomIdx = 0; seedAtomIdx < seed.getNumAtoms();
58  seedAtomIdx++)
59  currCodes[seedAtomIdx] =
60  queryAtomLabels[seed.MoleculeFragment.AtomsIdx[seedAtomIdx]];
61 
62  for (size_t iter = 0; iter < nIterations; iter++) {
63  for (size_t i = 0; i < nv; i++) prevCodes[i] = currCodes[i];
64 
65  for (size_t seedBondIdx = 0; seedBondIdx < ne; seedBondIdx++) {
66  const Bond* bond = seed.MoleculeFragment.Bonds[seedBondIdx];
67  unsigned order =
68  queryBondLabels[seed.MoleculeFragment.BondsIdx[seedBondIdx]];
69  unsigned atom1 = seed.MoleculeFragment.SeedAtomIdxMap
70  .find(bond->getBeginAtomIdx())
71  ->second;
72  unsigned atom2 =
74  ->second;
75  unsigned v1 = prevCodes[atom1];
76  unsigned v2 = prevCodes[atom2];
77 
78  currCodes[atom1] += v2 * v2 + (v2 + 23) * (order + 1721);
79  currCodes[atom2] += v1 * v1 + (v1 + 23) * (order + 1721);
80  }
81  }
82 
83  KeyNumericMetrics::TValue result = 0;
84  for (unsigned seedAtomIdx = 0; seedAtomIdx < nv; seedAtomIdx++) {
85  unsigned long code = currCodes[seedAtomIdx];
86  result += code * (code + 6849) + 29;
87  }
88 
89  NumericMetrics.Value = result;
90  }
91 
92  // not implemented yet:
93  /*
94  void computeFingerprint(const Seed& seed)
95  {
96  unsigned int radius = seed.getNumBonds();
97  if (radius > 5)
98  radius = 5;
99  ExplicitBitVect *mf =
100  RDKit::MorganFingerprints::getFingerprintAsBitVect(seed.GraphTopology,
101  radius); //SLOW !!!
102  // ...
103  delete mf;
104  NumericMetrics.Field.hasFingerprint = 1;
105  }
106  */
107  };
108 
109  typedef HashKey TKey;
110  typedef std::list<FMCS::Graph> TIndexEntry; // hash-key is not unique key
111  private:
112  std::vector<TIndexEntry> ValueStorage;
113  std::map<KeyNumericMetrics::TValue, size_t> NumericIndex; // TIndexEntry
114  public:
115  // returns computed key, and pointer to index entry with a set of subgraphs
116  // corresponding to the key if found.
117  // then caller must find exactly matched subgraph in the result set with own
118  // search algorithm,
119  // including a resolving of collisions of hash key
120  TIndexEntry* find(const Seed& seed,
121  const std::vector<unsigned>& queryAtomLabels,
122  const std::vector<unsigned>& queryBondLabels,
123  TKey& key) { // compute key and find entry
124  key.computeKey(seed, queryAtomLabels, queryBondLabels);
125  std::map<KeyNumericMetrics::TValue, size_t>::const_iterator entryit =
126  NumericIndex.find(key.NumericMetrics.Value);
127  if (NumericIndex.end() != entryit) return &ValueStorage[entryit->second];
128  return NULL; // not found
129  }
130 
131  // if find() did not found any entry for this key of seed a new entry will be
132  // created
133  void add(const Seed& seed, TKey& key,
134  TIndexEntry* entry) { // "compute" value and store it in NEW entry
135  // if not found
136  if (!entry) {
137  try {
138  ValueStorage.push_back(TIndexEntry());
139  } catch (...) {
140  return; // not enought memory room to add the item, but it's just a
141  // cache
142  }
143  entry = &ValueStorage.back();
144  }
145  entry->push_back(seed.Topology);
146 
147  if (!NumericIndex
148  .insert(std::pair<KeyNumericMetrics::TValue, size_t>(
149  key.NumericMetrics.Value, ValueStorage.size() - 1))
150  .second)
151  return; // not enought memory room to add the item, but it is just cache
152  }
153 
154  size_t keyssize() const { // for statistics only
155  return ValueStorage.size();
156  }
157 
158  size_t fullsize() const { // for statistics only
159  size_t n = 0;
160  for (std::vector<TIndexEntry>::const_iterator e = ValueStorage.begin();
161  e != ValueStorage.end(); e++)
162  n += e->size();
163  return n;
164  }
165 };
166 } // namespace FMCS
167 } // namespace RDKit
RDKit::FMCS::Seed::Topology
Graph Topology
Definition: Seed.h:72
RDKit::FMCS::MolFragment::BondsIdx
std::vector< unsigned > BondsIdx
Definition: Seed.h:29
RDKit::FMCS::MolFragment::AtomsIdx
std::vector< unsigned > AtomsIdx
Definition: Seed.h:28
RDKit::FMCS::SubstructureCache::KeyNumericMetrics::KeyNumericMetrics
KeyNumericMetrics()
Definition: SubstructureCache.h:32
RDKit::FMCS::SubstructureCache::TKey
HashKey TKey
Definition: SubstructureCache.h:109
RDKit::FMCS::MolFragment::SeedAtomIdxMap
std::map< unsigned, unsigned > SeedAtomIdxMap
Definition: Seed.h:30
RDKit::Bond
class for representing a bond
Definition: Bond.h:47
RDKit::FMCS::SubstructureCache::TIndexEntry
std::list< FMCS::Graph > TIndexEntry
Definition: SubstructureCache.h:110
RDKit::FMCS::SubstructureCache::find
TIndexEntry * find(const Seed &seed, const std::vector< unsigned > &queryAtomLabels, const std::vector< unsigned > &queryBondLabels, TKey &key)
Definition: SubstructureCache.h:120
RDKit::FMCS::Seed
Definition: Seed.h:62
RDKit::Bond::getEndAtomIdx
unsigned int getEndAtomIdx() const
returns the index of our end Atom
Definition: Bond.h:185
RDKit::FMCS::SubstructureCache::HashKey
Definition: SubstructureCache.h:36
RDKit::FMCS::SubstructureCache::fullsize
size_t fullsize() const
Definition: SubstructureCache.h:158
RDKit::FMCS::Seed::MoleculeFragment
MolFragment MoleculeFragment
Definition: Seed.h:71
RDKit::FMCS::SubstructureCache::KeyNumericMetrics
Definition: SubstructureCache.h:27
RDKit::FMCS::SubstructureCache::HashKey::computeKey
void computeKey(const Seed &seed, const std::vector< unsigned > &queryAtomLabels, const std::vector< unsigned > &queryBondLabels)
Definition: SubstructureCache.h:40
RDKit::FMCS::SubstructureCache::KeyNumericMetrics::TValue
unsigned long long TValue
Definition: SubstructureCache.h:28
RDKit::FMCS::SubstructureCache::HashKey::NumericMetrics
KeyNumericMetrics NumericMetrics
Definition: SubstructureCache.h:37
Seed.h
RDKit
Std stuff.
Definition: Atom.h:30
RDKit::FMCS::Seed::getNumAtoms
unsigned getNumAtoms() const
Definition: Seed.h:127
RDKit::FMCS::SubstructureCache::add
void add(const Seed &seed, TKey &key, TIndexEntry *entry)
Definition: SubstructureCache.h:133
RDKit::FMCS::SubstructureCache::keyssize
size_t keyssize() const
Definition: SubstructureCache.h:154
RDKit::FMCS::Seed::getNumBonds
unsigned getNumBonds() const
Definition: Seed.h:128
Graph.h
RDKit::FMCS::MolFragment::Bonds
std::vector< const Bond * > Bonds
Definition: Seed.h:27
RDKIT_FMCS_EXPORT
#define RDKIT_FMCS_EXPORT
Definition: export.h:203
RDKit::FMCS::SubstructureCache::KeyNumericMetrics::Value
TValue Value
Definition: SubstructureCache.h:29
DebugTrace.h
RDKit::FMCS::SubstructureCache
Definition: SubstructureCache.h:24
RDKit::Bond::getBeginAtomIdx
unsigned int getBeginAtomIdx() const
returns the index of our begin Atom
Definition: Bond.h:178
export.h