RDKit
Open-source cheminformatics and machine learning.
HierarchicalClusterPicker.h
Go to the documentation of this file.
1
//
2
// Copyright (C) 2003-2006 Rational Discovery LLC
3
//
4
// @@ All Rights Reserved @@
5
// This file is part of the RDKit.
6
// The contents are covered by the terms of the BSD license
7
// which is included in the file license.txt, found at the root
8
// of the RDKit source tree.
9
//
10
#include <
RDGeneral/export.h
>
11
#ifndef _HIERARCHCLUSTERPICKER_H
12
#define _HIERARCHCLUSTERPICKER_H
13
14
#include <
RDGeneral/types.h
>
15
#include "
DistPicker.h
"
16
17
namespace
RDPickers
{
18
19
/*! \brief Diversity picker based on hierarchical clustering
20
*
21
* This class inherits from DistPicker since it uses the distance matrix
22
* for diversity picking. The clustering itself is done using the Murtagh
23
* code in $RDBASE/Code/ML/Cluster/Mutagh/
24
*/
25
class
RDKIT_SIMDIVPICKERS_EXPORT
HierarchicalClusterPicker
:
public
DistPicker
{
26
public
:
27
/*! \brief The type of hierarchical clustering algorithm to use
28
*/
29
typedef
enum
{
30
WARD = 1,
31
SLINK = 2,
32
CLINK = 3,
33
UPGMA = 4,
34
MCQUITTY = 5,
35
GOWER = 6,
36
CENTROID = 7
37
} ClusterMethod;
38
39
/*! \brief Constructor - takes a ClusterMethod as an argument
40
*
41
* Sets the hierarch clustering method
42
*/
43
explicit
HierarchicalClusterPicker
(
ClusterMethod
clusterMethod)
44
: d_method(clusterMethod) {
45
;
46
};
47
48
/*! \brief This is the function that does the picking
49
*
50
* Here is how the algorithm works \n
51
* FIX: Supply reference
52
*
53
* - The entire pool is clustered using the distance matrix using one of the
54
* hierachical clustering method (specified via the constructor). \n
55
* - Starting with the individaul items in the pool, clusters are merged based
56
* on the output from clustering method. \n
57
* - The merging is stopped when the number of clusters is same as
58
* the number of picks.
59
* - For each item in a cluster the sum of square of the distances to the rest
60
*of
61
* of the items (in the cluster) is computed. The item with the smallest of
62
*values is
63
* picked as a representative of the cluster. Basically trying to pick the
64
*item closest
65
* to the centroid of the cluster.
66
*
67
*
68
* \param distMat - distance matrix - a vector of double. It is assumed
69
*that only the
70
* lower triangle element of the matrix are supplied in a 1D
71
*array\n
72
* NOTE: this matrix WILL BE ALTERED during the picking\n
73
* \param poolSize - the size of the pool to pick the items from. It is
74
*assumed that the
75
* distance matrix above contains the right number of elements;
76
*i.e.
77
* poolSize*(poolSize-1) \n
78
* \param pickSize - the number items to pick from pool (<= poolSize)
79
*/
80
RDKit::INT_VECT
pick(
const
double
*distMat,
unsigned
int
poolSize,
81
unsigned
int
pickSize)
const
;
82
83
/*! \brief This is the function that does the clustering of the items - used
84
*by the picker
85
*
86
* ARGUMENTS:
87
*
88
* \param distMat - distance matrix - a vector of double. It is assumed that
89
*only the
90
* lower triangle element of the matrix are supplied in a 1D
91
*array\n
92
* NOTE: this matrix WILL BE ALTERED during the picking\n
93
* \param poolSize - the size of the pool to pick the items from. It is
94
*assumed that the
95
* distance matrix above contains the right number of elements;
96
*i.e.
97
* poolSize*(poolSize-1) \n
98
* \param pickSize - the number clusters to divide the pool into (<=
99
*poolSize)
100
*/
101
RDKit::VECT_INT_VECT
cluster(
const
double
*distMat,
unsigned
int
poolSize,
102
unsigned
int
pickSize)
const
;
103
104
private
:
105
ClusterMethod d_method;
106
};
107
};
// namespace RDPickers
108
109
#endif
RDKit::VECT_INT_VECT
std::vector< INT_VECT > VECT_INT_VECT
Definition:
types.h:268
RDKit::INT_VECT
std::vector< int > INT_VECT
Definition:
types.h:254
types.h
RDKIT_SIMDIVPICKERS_EXPORT
#define RDKIT_SIMDIVPICKERS_EXPORT
Definition:
export.h:619
RDPickers::HierarchicalClusterPicker
Diversity picker based on hierarchical clustering.
Definition:
HierarchicalClusterPicker.h:25
RDPickers::DistPicker
Abstract base class to do perform item picking (typically molecules) using a distance matrix.
Definition:
DistPicker.h:46
RDPickers
Definition:
DistPicker.h:16
RDPickers::HierarchicalClusterPicker::ClusterMethod
ClusterMethod
The type of hierarchical clustering algorithm to use.
Definition:
HierarchicalClusterPicker.h:29
RDPickers::HierarchicalClusterPicker::HierarchicalClusterPicker
HierarchicalClusterPicker(ClusterMethod clusterMethod)
Constructor - takes a ClusterMethod as an argument.
Definition:
HierarchicalClusterPicker.h:43
DistPicker.h
export.h
SimDivPickers
HierarchicalClusterPicker.h
Generated on Fri Jun 12 2020 19:04:45 for RDKit by
1.8.17