1package denoptim.fragmenter;
3import java.util.ArrayList;
4import java.util.Arrays;
5import java.util.HashSet;
8import java.util.logging.Level;
9import java.util.logging.Logger;
11import javax.vecmath.Point3d;
13import org.apache.commons.math3.exception.DimensionMismatchException;
14import org.apache.commons.math3.ml.distance.DistanceMeasure;
15import org.apache.commons.math3.stat.descriptive.SummaryStatistics;
16import org.biojava.nbio.structure.geometry.CalcPoint;
17import org.biojava.nbio.structure.geometry.SuperPositionSVD;
19import denoptim.exception.DENOPTIMException;
20import denoptim.graph.Fragment;
21import denoptim.programs.fragmenter.FragmenterParameters;
22import denoptim.utils.MathUtils;
23import denoptim.utils.Randomizer;
53 private List<ClusterableFragment>
data;
60 private List<DynamicCentroidCluster>
clusters =
61 new ArrayList<DynamicCentroidCluster>();
172 for (
int i=0; i<
data.size(); i++)
179 boolean hasChanged =
true;
181 while (hasChanged && i<5)
190 StringBuilder sb =
new StringBuilder();
191 sb.append(
"Final number of clusters: ").append(
clusters.size());
192 sb.append(
" (#iter. "+i+
")");
194 for (
int j=0; j<
clusters.size(); j++)
196 sb.append(
" Size cluster "+j+
": ");
197 sb.append(
clusters.get(j).getPoints().size());
200 logger.log(Level.INFO, sb.toString());
209 boolean somethingMoved =
false;
211 SuperPositionSVD svd =
new SuperPositionSVD(
false);
213 Set<DynamicCentroidCluster> toRemoveClusters =
214 new HashSet<DynamicCentroidCluster>();
215 for (
int i=0; i<
clusters.size(); i++)
219 if (toRemoveClusters.contains(clusterI))
224 logger.log(Level.FINE,
"Clustering around centroid "+i+
"...");
236 double rmsdThreshold = refRMSDStats.getMean()
238 * refRMSDStats.getStandardDeviation();
240 for (
int j=i+1; j<
clusters.size(); j++)
245 if (toRemoveClusters.contains(clusterJ))
263 svd.superposeAndTransform(ptsCentroidI, ptsCentroidJ);
264 double rmsd = CalcPoint.rmsd(ptsCentroidI, ptsCentroidJ);
265 if (rmsd < rmsdThreshold)
267 somethingMoved =
true;
268 toRemoveClusters.add(clusterJ);
271 logger.log(Level.FINEST,
"Merging cluster " + j +
" into "
272 +
"cluster " + i +
" (RMSD "
273 + String.format(
"%.4f", rmsd) +
"<"
274 + String.format(
"%.4f", rmsdThreshold) +
").");
280 svd.superposeAndTransform(ptsCentroidI, ptsPointJ);
281 pointJ.setCoordsVector(ptsPointJ);
286 Set<ClusterableFragment> toRemoveFromJ =
287 new HashSet<ClusterableFragment>();
293 svd.superposeAndTransform(ptsCentroidI, ptsPointJ);
294 double rmsdJ = CalcPoint.rmsd(ptsCentroidJ, ptsPointJ);
295 svd.superposeAndTransform(ptsCentroidI, ptsPointJ);
296 double rmsdI = CalcPoint.rmsd(ptsCentroidI, ptsPointJ);
299 somethingMoved =
true;
300 pointJ.setCoordsVector(ptsPointJ);
302 toRemoveFromJ.add(pointJ);
305 logger.log(Level.FINEST,
"Moving one fragment "
306 +
"from cluster " + j +
" to "
307 +
"cluster " + i +
" (RMSD "
308 + String.format(
"%.4f", rmsd) +
">="
309 + String.format(
"%.4f", rmsdThreshold) +
").");
315 toRemoveClusters.add(clusterJ);
320 clusters.removeAll(toRemoveClusters);
322 return somethingMoved;
346 double[] center,
int sampleSize,
double maxNoise)
354 SummaryStatistics stats =
new SummaryStatistics();
355 List<double[]> refClusterCoords =
new ArrayList<double[]>();
356 for (
int k=0; k<sampleSize; k++)
358 double[] coords = Arrays.copyOf(center, center.length);
359 for (
int j=0; j<coords.length; j++)
363 refClusterCoords.add(coords);
368 for (
double[] coords : refClusterCoords)
370 double rmsd = measure.
compute(coords,refCentroidCoords);
371 stats.addValue(rmsd);
382 @SuppressWarnings(
"serial")
386 public double compute(
double[] coordsA,
double[] coordsB)
387 throws DimensionMismatchException
389 Point3d[] ptsA =
new Point3d[coordsA.length/3];
390 Point3d[] ptsB =
new Point3d[coordsB.length/3];
391 for (
int i=0; i<coordsA.length/3; i++)
394 ptsA[i] =
new Point3d(coordsA[j],
397 ptsB[i] =
new Point3d(coordsB[j],
401 SuperPositionSVD svd =
new SuperPositionSVD(
false);
402 double rmsd = svd.getRmsd(ptsA,ptsB);
406 public double compute(Point3d[] ptsA, Point3d[] ptsB)
407 throws DimensionMismatchException
409 SuperPositionSVD svd =
new SuperPositionSVD(
false);
410 double rmsd = svd.getRmsd(ptsA,ptsB);
438 List<List<Fragment>> transformedClusters =
new ArrayList<List<Fragment>>();
439 for (
int i=0; i<
clusters.size(); i++)
441 List<Fragment> transformedCluster =
new ArrayList<Fragment>();
445 transformedCluster.add(cf.getTransformedCopy());
447 transformedClusters.add(transformedCluster);
449 return transformedClusters;
465 List<Fragment> centroids =
new ArrayList<Fragment>();
466 for (
int i=0; i<
clusters.size(); i++)
489 List<Fragment> nearestToCentroid =
new ArrayList<Fragment>();
490 for (
int i=0; i<
clusters.size(); i++)
496 nearestToCentroid.add(frag);
498 return nearestToCentroid;
Represents a fragment that can be clustered based on the 3*N coordinate of atoms and attachment point...
Fragment getTransformedCopy()
static Point3d[] convertToPointArray(double[] coords)
Converts an array of 3*N-dimensional coordinates into an array of 3-dimensional points assuming coord...
A cluster with a centroid that can be updated after definition of the cluster.
ClusterableFragment getCentroid()
Get the point chosen to be the centroid of this cluster.
void removeAll(Collection< ClusterableFragment > points)
Remove the given cluster member, if it is a member of this cluster.
void addPoint(ClusterableFragment point)
Add a new member of this cluster.
List< ClusterableFragment > getPoints()
Get the points contained in the cluster.
Distance in terms of RMSD between sets of 3D points expressed as a single vector of coordinates [x1,...
double compute(Point3d[] ptsA, Point3d[] ptsB)
double compute(double[] coordsA, double[] coordsB)
FragmentClusterer(List< ClusterableFragment > data, FragmenterParameters settings)
Constructor for a clusterer of fragments.
List< Fragment > getClusterCentroids()
Once the clustering is done, this method return the list of cluster centroids.
static SummaryStatistics getRMSDStatsOfNoisyDistorsions(double[] center, int sampleSize, double maxNoise)
Computes statistics for a unimodal, normally noise-distorted population of points generated by distor...
void cluster()
Runs the clustering algorithm:
FragmentClusterer(List< ClusterableFragment > data, FragmenterParameters settings, Logger logger)
Constructor for a clusterer of fragments.
FragmenterParameters settings
Settings from the user.
List< Fragment > getNearestToClusterCentroids()
Once the clustering is done, this method return the list of fragments that are nearest to the respect...
List< ClusterableFragment > data
The list of fragments to be clustered.
List< DynamicCentroidCluster > clusters
Current list of clusters.
List< DynamicCentroidCluster > getClusters()
Once the clustering is done, this method return the list of resulting clusters.
List< List< Fragment > > getTransformedClusters()
Once the clustering is done, this method return the list of clusters.
Class representing a continuously connected portion of chemical object holding attachment points.
final String NL
New line character.
Parameters controlling execution of the fragmenter.
double getFactorForSDOnStatsOfUnimodalPop()
double getMaxNoiseUnimodalPop()
Some useful math operations.
static double[] centroidOf(Collection< double[]> points, int dimension)
Tool to generate random numbers and random decisions.
double nextNormalDouble()
Returns the next pseudo-random, normally distributed double value between 0.0 and 1....