RDKit
Open-source cheminformatics and machine learning.
DuplicatedSeedCache.h
Go to the documentation of this file.
1 //
2 // Copyright (C) 2014 Novartis Institutes for BioMedical Research
3 //
4 // @@ All Rights Reserved @@
5 // This file is part of the RDKit.
6 // The contents are covered by the terms of the BSD license
7 // which is included in the file license.txt, found at the root
8 // of the RDKit source tree.
9 //
10 #include <RDGeneral/export.h>
11 #pragma once
12 #include <map>
13 #include <vector>
14 #include <stdexcept>
15 #include <algorithm>
16 
17 namespace RDKit {
18 namespace FMCS {
20  public:
21  typedef bool TValue;
22  class TKey {
23  std::vector<unsigned> AtomIdx; // sorted
24  std::vector<unsigned> BondIdx; // sorted
25  public:
26  size_t getNumAtoms() const { return AtomIdx.size(); }
27  size_t getNumBonds() const { return BondIdx.size(); }
28 
29  void addAtom(unsigned i) {
30  std::vector<unsigned>::iterator it =
31  std::lower_bound(AtomIdx.begin(), AtomIdx.end(), i);
32  AtomIdx.insert(it, i);
33  }
34  void addBond(unsigned i) {
35  std::vector<unsigned>::iterator it =
36  std::lower_bound(BondIdx.begin(), BondIdx.end(), i);
37  BondIdx.insert(it, i);
38  }
39 
40  bool operator==(const TKey& right) const { // opt.
41  return AtomIdx.size() == right.AtomIdx.size() &&
42  BondIdx.size() == right.BondIdx.size() &&
43  0 == memcmp(&AtomIdx[0], &right.AtomIdx[0],
44  AtomIdx.size() * sizeof(unsigned)) &&
45  0 == memcmp(&BondIdx[0], &right.BondIdx[0],
46  BondIdx.size() * sizeof(unsigned));
47  }
48 
49  bool operator<(const TKey& right) const {
50  if (AtomIdx.size() < right.AtomIdx.size()) return true;
51  if (AtomIdx.size() > right.AtomIdx.size()) return false;
52 
53  if (BondIdx.size() < right.BondIdx.size()) return true;
54  if (BondIdx.size() > right.BondIdx.size()) return false;
55 
56  // everything is equal -> perform straight comparision
57  int diff;
58  diff = memcmp(&AtomIdx[0], &right.AtomIdx[0],
59  AtomIdx.size() * sizeof(unsigned));
60  if (diff < 0) return true;
61  if (diff > 0) return false;
62  return memcmp(&BondIdx[0], &right.BondIdx[0],
63  BondIdx.size() * sizeof(unsigned)) < 0;
64  }
65  };
66 
67  private:
68  std::map<TKey, TValue> Index;
69  size_t MaxAtoms; // max key in the cache for fast failed find
70  public:
71  DuplicatedSeedCache() : MaxAtoms(0) {}
72  void clear() {
73  Index.clear();
74  MaxAtoms = 0;
75  }
76 
77  bool find(const TKey& key, TValue& value) const {
78  value = false;
79  if (key.getNumAtoms() > MaxAtoms)
80  return false; // fast check if key greater then max key in the cache
81 
82  std::map<TKey, TValue>::const_iterator entryit = Index.find(key);
83  if (Index.end() != entryit) value = entryit->second;
84  return Index.end() != entryit;
85  }
86 
87  void add(const TKey& key, TValue found = true) {
88  if (key.getNumAtoms() > MaxAtoms) MaxAtoms = key.getNumAtoms();
89 
90  Index.insert(std::pair<TKey, bool>(key, found));
91  }
92 
93  size_t size() const {
94  return Index.size(); // for statistics only
95  }
96 };
97 } // namespace FMCS
98 } // namespace RDKit
RDKit::FMCS::DuplicatedSeedCache::TKey::operator==
bool operator==(const TKey &right) const
Definition: DuplicatedSeedCache.h:40
RDKit::FMCS::DuplicatedSeedCache::TKey::getNumAtoms
size_t getNumAtoms() const
Definition: DuplicatedSeedCache.h:26
RDKit::FMCS::DuplicatedSeedCache::TKey::operator<
bool operator<(const TKey &right) const
Definition: DuplicatedSeedCache.h:49
RDKit::FMCS::DuplicatedSeedCache::TKey::addAtom
void addAtom(unsigned i)
Definition: DuplicatedSeedCache.h:29
RDKit::FMCS::DuplicatedSeedCache::TKey::addBond
void addBond(unsigned i)
Definition: DuplicatedSeedCache.h:34
RDKit::FMCS::DuplicatedSeedCache::find
bool find(const TKey &key, TValue &value) const
Definition: DuplicatedSeedCache.h:77
RDKit::FMCS::DuplicatedSeedCache::TKey
Definition: DuplicatedSeedCache.h:22
RDKit::FMCS::DuplicatedSeedCache
Definition: DuplicatedSeedCache.h:19
RDKit::FMCS::DuplicatedSeedCache::size
size_t size() const
Definition: DuplicatedSeedCache.h:93
RDKit::FMCS::DuplicatedSeedCache::add
void add(const TKey &key, TValue found=true)
Definition: DuplicatedSeedCache.h:87
RDKit::FMCS::DuplicatedSeedCache::DuplicatedSeedCache
DuplicatedSeedCache()
Definition: DuplicatedSeedCache.h:71
RDKit::FMCS::DuplicatedSeedCache::clear
void clear()
Definition: DuplicatedSeedCache.h:72
RDKit
Std stuff.
Definition: Atom.h:30
RDKit::FMCS::DuplicatedSeedCache::TValue
bool TValue
Definition: DuplicatedSeedCache.h:21
RDKit::FMCS::DuplicatedSeedCache::TKey::getNumBonds
size_t getNumBonds() const
Definition: DuplicatedSeedCache.h:27
export.h