$darkmode
DENOPTIM
FragmentClusterer.java
Go to the documentation of this file.
1package denoptim.fragmenter;
2
3import java.util.ArrayList;
4import java.util.Arrays;
5import java.util.HashSet;
6import java.util.List;
7import java.util.Set;
8import java.util.logging.Level;
9import java.util.logging.Logger;
10
11import javax.vecmath.Point3d;
12
13import org.apache.commons.math3.exception.DimensionMismatchException;
14import org.apache.commons.math3.ml.distance.DistanceMeasure;
15import org.apache.commons.math3.stat.descriptive.SummaryStatistics;
16import org.biojava.nbio.structure.geometry.CalcPoint;
17import org.biojava.nbio.structure.geometry.SuperPositionSVD;
18
19import denoptim.exception.DENOPTIMException;
20import denoptim.graph.Fragment;
21import denoptim.programs.fragmenter.FragmenterParameters;
22import denoptim.utils.MathUtils;
23import denoptim.utils.Randomizer;
24
49{
53 private List<ClusterableFragment> data;
54
60 private List<DynamicCentroidCluster> clusters =
61 new ArrayList<DynamicCentroidCluster>();
62
67
71 private Logger logger;
72
73//------------------------------------------------------------------------------
74
98 public FragmentClusterer(List<ClusterableFragment> data,
100 {
101 this.data = data;
102 this.settings = settings;
103 this.logger = null;
104 }
105
106//------------------------------------------------------------------------------
107
136 public FragmentClusterer(List<ClusterableFragment> data,
138 {
139 this.data = data;
140 this.settings = settings;
141 this.logger = logger;
142 }
143
144//------------------------------------------------------------------------------
145
169 public void cluster()
170 {
171 // Start by assigning each data to its own cluster
172 for (int i=0; i<data.size(); i++)
173 {
175 clusters.add(cluster);
176 }
177
178 // Iteratively try to refine clusters
179 boolean hasChanged = true;
180 int i=0;
181 while (hasChanged && i<5)
182 {
183 hasChanged = mergeClusters();
184 i++;
185 }
186
187 // Print summary
188 if (logger!=null)
189 {
190 StringBuilder sb = new StringBuilder();
191 sb.append("Final number of clusters: ").append(clusters.size());
192 sb.append(" (#iter. "+i+")");
193 sb.append(settings.NL);
194 for (int j=0; j<clusters.size(); j++)
195 {
196 sb.append(" Size cluster "+j+": ");
197 sb.append(clusters.get(j).getPoints().size());
198 sb.append(settings.NL);
199 }
200 logger.log(Level.INFO, sb.toString());
201 }
202
203 }
204
205//------------------------------------------------------------------------------
206
207 private boolean mergeClusters()
208 {
209 boolean somethingMoved = false;
210
211 SuperPositionSVD svd = new SuperPositionSVD(false);
212
213 Set<DynamicCentroidCluster> toRemoveClusters =
214 new HashSet<DynamicCentroidCluster>();
215 for (int i=0; i<clusters.size(); i++)
216 {
217 DynamicCentroidCluster clusterI = clusters.get(i);
218
219 if (toRemoveClusters.contains(clusterI))
220 continue;
221
222 if (logger!=null)
223 {
224 logger.log(Level.FINE,"Clustering around centroid "+i+"...");
225 }
226
227 ClusterableFragment centroidI =
228 (ClusterableFragment) clusterI.getCentroid();
229
230 // Define a distance (RMSD upon superposition) for discriminating
231 // this geometry from the others.
232 SummaryStatistics refRMSDStats = getRMSDStatsOfNoisyDistorsions(
233 centroidI.getPoint(),
236 double rmsdThreshold = refRMSDStats.getMean()
238 * refRMSDStats.getStandardDeviation();
239
240 for (int j=i+1; j<clusters.size(); j++)
241 {
242 DynamicCentroidCluster clusterJ =
243 clusters.get(j);
244
245 if (toRemoveClusters.contains(clusterJ))
246 continue;
247
248 //TODO: consider re-aligning to test alternative mappings. This
249 // because the mapping is done once against the first item in
250 // the sample, but to distinguish sample members N!=1 and M!=1
251 // a different mapping (i.e., a different isomorphism) might be
252 // preferable.
253 // Essentially, this means "get rid of the assumption that one
254 // isomorphism is suitable to align all members of the sample.
255
257 clusterJ.getCentroid();
258
259 Point3d[] ptsCentroidI = ClusterableFragment.convertToPointArray(
260 centroidI.getPoint());
261 Point3d[] ptsCentroidJ = ClusterableFragment.convertToPointArray(
262 centroidJ.getPoint());
263 svd.superposeAndTransform(ptsCentroidI, ptsCentroidJ);
264 double rmsd = CalcPoint.rmsd(ptsCentroidI, ptsCentroidJ);
265 if (rmsd < rmsdThreshold)
266 {
267 somethingMoved = true;
268 toRemoveClusters.add(clusterJ);
269 if (logger!=null)
270 {
271 logger.log(Level.FINEST,"Merging cluster " + j + " into "
272 + "cluster " + i + " (RMSD "
273 + String.format("%.4f", rmsd) + "<"
274 + String.format("%.4f", rmsdThreshold) + ").");
275 }
276 for (ClusterableFragment pointJ : clusterJ.getPoints())
277 {
278 Point3d[] ptsPointJ = ClusterableFragment.convertToPointArray(
279 pointJ.getPoint());
280 svd.superposeAndTransform(ptsCentroidI, ptsPointJ);
281 pointJ.setCoordsVector(ptsPointJ);
282 clusterI.addPoint(pointJ);
283 }
284 } else {
285 // J looks like a cluster distinct from I. Try to move members
286 Set<ClusterableFragment> toRemoveFromJ =
287 new HashSet<ClusterableFragment>();
288 for (ClusterableFragment pointJ : clusterJ.getPoints())
289 {
290
291 Point3d[] ptsPointJ = ClusterableFragment.convertToPointArray(
292 pointJ.getPoint());
293 svd.superposeAndTransform(ptsCentroidI, ptsPointJ);
294 double rmsdJ = CalcPoint.rmsd(ptsCentroidJ, ptsPointJ);
295 svd.superposeAndTransform(ptsCentroidI, ptsPointJ);
296 double rmsdI = CalcPoint.rmsd(ptsCentroidI, ptsPointJ);
297 if (rmsdI < rmsdJ)
298 {
299 somethingMoved = true;
300 pointJ.setCoordsVector(ptsPointJ);
301 clusterI.addPoint(pointJ);
302 toRemoveFromJ.add(pointJ);
303 if (logger!=null)
304 {
305 logger.log(Level.FINEST,"Moving one fragment "
306 + "from cluster " + j + " to "
307 + "cluster " + i + " (RMSD "
308 + String.format("%.4f", rmsd) + ">="
309 + String.format("%.4f", rmsdThreshold) + ").");
310 }
311 }
312 }
313 clusterJ.removeAll(toRemoveFromJ);
314 if (clusterJ.getPoints().size()==0)
315 toRemoveClusters.add(clusterJ);
316 }
317 }
318 }
319
320 clusters.removeAll(toRemoveClusters);
321
322 return somethingMoved;
323 }
324
325//------------------------------------------------------------------------------
326
345 protected static SummaryStatistics getRMSDStatsOfNoisyDistorsions(
346 double[] center, int sampleSize, double maxNoise)
347 {
348 // We use always the same randomizer to get reproducible values
349 // no matter what randomizer is the caller using.
350 Randomizer rng = new Randomizer(1L);
351
352 DistanceAsRMSD measure = new DistanceAsRMSD();
353
354 SummaryStatistics stats = new SummaryStatistics();
355 List<double[]> refClusterCoords = new ArrayList<double[]>();
356 for (int k=0; k<sampleSize; k++)
357 {
358 double[] coords = Arrays.copyOf(center, center.length);
359 for (int j=0; j<coords.length; j++)
360 {
361 coords[j] = coords[j] + maxNoise*(2*rng.nextNormalDouble()-1);
362 }
363 refClusterCoords.add(coords);
364 }
365 double[] refCentroidCoords = MathUtils.centroidOf(refClusterCoords,
366 center.length);
367
368 for (double[] coords : refClusterCoords)
369 {
370 double rmsd = measure.compute(coords,refCentroidCoords);
371 stats.addValue(rmsd);
372 }
373 return stats;
374 }
375
376//------------------------------------------------------------------------------
377
382 @SuppressWarnings("serial")
383 public static class DistanceAsRMSD implements DistanceMeasure
384 {
385 @Override
386 public double compute(double[] coordsA, double[] coordsB)
387 throws DimensionMismatchException
388 {
389 Point3d[] ptsA = new Point3d[coordsA.length/3];
390 Point3d[] ptsB = new Point3d[coordsB.length/3];
391 for (int i=0; i<coordsA.length/3; i++)
392 {
393 int j = i*3;
394 ptsA[i] = new Point3d(coordsA[j],
395 coordsA[j+1],
396 coordsA[j+2]);
397 ptsB[i] = new Point3d(coordsB[j],
398 coordsB[j+1],
399 coordsB[j+2]);
400 }
401 SuperPositionSVD svd = new SuperPositionSVD(false);
402 double rmsd = svd.getRmsd(ptsA,ptsB);
403 return rmsd;
404 }
405
406 public double compute(Point3d[] ptsA, Point3d[] ptsB)
407 throws DimensionMismatchException
408 {
409 SuperPositionSVD svd = new SuperPositionSVD(false);
410 double rmsd = svd.getRmsd(ptsA,ptsB);
411 return rmsd;
412 }
413 }
414
415//------------------------------------------------------------------------------
416
422 public List<DynamicCentroidCluster> getClusters()
423 {
424 return clusters;
425 }
426
427//------------------------------------------------------------------------------
428
436 public List<List<Fragment>> getTransformedClusters()
437 {
438 List<List<Fragment>> transformedClusters = new ArrayList<List<Fragment>>();
439 for (int i=0; i<clusters.size(); i++)
440 {
441 List<Fragment> transformedCluster = new ArrayList<Fragment>();
443 for (ClusterableFragment cf : cluster.getPoints())
444 {
445 transformedCluster.add(cf.getTransformedCopy());
446 }
447 transformedClusters.add(transformedCluster);
448 }
449 return transformedClusters;
450 }
451
452//------------------------------------------------------------------------------
453
463 public List<Fragment> getClusterCentroids()
464 {
465 List<Fragment> centroids = new ArrayList<Fragment>();
466 for (int i=0; i<clusters.size(); i++)
467 {
469 ClusterableFragment centroid = cluster.getCentroid();
470 Fragment frag = centroid.getTransformedCopy();
471 centroids.add(frag);
472 }
473 return centroids;
474 }
475
476//------------------------------------------------------------------------------
477
487 public List<Fragment> getNearestToClusterCentroids()
488 {
489 List<Fragment> nearestToCentroid = new ArrayList<Fragment>();
490 for (int i=0; i<clusters.size(); i++)
491 {
493 ClusterableFragment nearest = cluster.getNearestToCentroid(
494 new DistanceAsRMSD());
495 Fragment frag = nearest.getTransformedCopy();
496 nearestToCentroid.add(frag);
497 }
498 return nearestToCentroid;
499 }
500
501//------------------------------------------------------------------------------
502
503}
Represents a fragment that can be clustered based on the 3*N coordinate of atoms and attachment point...
static Point3d[] convertToPointArray(double[] coords)
Converts an array of 3*N-dimensional coordinates into an array of 3-dimensional points assuming coord...
A cluster with a centroid that can be updated after definition of the cluster.
ClusterableFragment getCentroid()
Get the point chosen to be the centroid of this cluster.
void removeAll(Collection< ClusterableFragment > points)
Remove the given cluster member, if it is a member of this cluster.
void addPoint(ClusterableFragment point)
Add a new member of this cluster.
List< ClusterableFragment > getPoints()
Get the points contained in the cluster.
Distance in terms of RMSD between sets of 3D points expressed as a single vector of coordinates [x1,...
double compute(Point3d[] ptsA, Point3d[] ptsB)
double compute(double[] coordsA, double[] coordsB)
FragmentClusterer(List< ClusterableFragment > data, FragmenterParameters settings)
Constructor for a clusterer of fragments.
List< Fragment > getClusterCentroids()
Once the clustering is done, this method return the list of cluster centroids.
static SummaryStatistics getRMSDStatsOfNoisyDistorsions(double[] center, int sampleSize, double maxNoise)
Computes statistics for a unimodal, normally noise-distorted population of points generated by distor...
void cluster()
Runs the clustering algorithm:
FragmentClusterer(List< ClusterableFragment > data, FragmenterParameters settings, Logger logger)
Constructor for a clusterer of fragments.
FragmenterParameters settings
Settings from the user.
List< Fragment > getNearestToClusterCentroids()
Once the clustering is done, this method return the list of fragments that are nearest to the respect...
List< ClusterableFragment > data
The list of fragments to be clustered.
List< DynamicCentroidCluster > clusters
Current list of clusters.
List< DynamicCentroidCluster > getClusters()
Once the clustering is done, this method return the list of resulting clusters.
List< List< Fragment > > getTransformedClusters()
Once the clustering is done, this method return the list of clusters.
Class representing a continuously connected portion of chemical object holding attachment points.
Definition: Fragment.java:61
final String NL
New line character.
Parameters controlling execution of the fragmenter.
Some useful math operations.
Definition: MathUtils.java:39
static double[] centroidOf(Collection< double[]> points, int dimension)
Definition: MathUtils.java:457
Tool to generate random numbers and random decisions.
Definition: Randomizer.java:35
double nextNormalDouble()
Returns the next pseudo-random, normally distributed double value between 0.0 and 1....