23import java.io.IOException;
24import java.nio.file.Path;
25import java.text.DecimalFormat;
26import java.text.NumberFormat;
27import java.util.ArrayList;
28import java.util.Arrays;
29import java.util.Comparator;
30import java.util.HashMap;
31import java.util.HashSet;
32import java.util.Iterator;
34import java.util.Locale;
36import java.util.concurrent.atomic.AtomicInteger;
37import java.util.logging.Level;
38import java.util.logging.Logger;
40import org.apache.commons.io.FileUtils;
41import org.apache.commons.io.FilenameUtils;
42import org.openscience.cdk.interfaces.IAtom;
43import org.openscience.cdk.interfaces.IAtomContainer;
45import denoptim.constants.DENOPTIMConstants;
46import denoptim.exception.DENOPTIMException;
47import denoptim.fitness.FitnessParameters;
48import denoptim.fragmenter.FragmenterTools;
49import denoptim.fragmenter.ScaffoldingPolicy;
50import denoptim.fragspace.FragmentSpace;
51import denoptim.fragspace.FragmentSpaceParameters;
52import denoptim.graph.APClass;
53import denoptim.graph.AttachmentPoint;
54import denoptim.graph.Candidate;
55import denoptim.graph.DGraph;
56import denoptim.graph.Edge.BondType;
57import denoptim.graph.EmptyVertex;
58import denoptim.graph.Fragment;
59import denoptim.graph.GraphPattern;
60import denoptim.graph.Ring;
61import denoptim.graph.SymmetricAPs;
62import denoptim.graph.Template;
63import denoptim.graph.Template.ContractLevel;
64import denoptim.graph.Vertex;
65import denoptim.graph.Vertex.BBType;
66import denoptim.graph.rings.CyclicGraphHandler;
67import denoptim.graph.rings.RingClosureParameters;
68import denoptim.graph.rings.RingClosuresArchive;
69import denoptim.io.DenoptimIO;
70import denoptim.logging.CounterID;
71import denoptim.logging.Monitor;
72import denoptim.molecularmodeling.ThreeDimTreeBuilder;
73import denoptim.programs.RunTimeParameters.ParametersType;
74import denoptim.programs.denovo.GAParameters;
75import denoptim.programs.fragmenter.CuttingRule;
76import denoptim.programs.fragmenter.FragmenterParameters;
77import denoptim.utils.DummyAtomHandler;
78import denoptim.utils.GeneralUtils;
79import denoptim.utils.GraphUtils;
80import denoptim.utils.MoleculeUtils;
81import denoptim.utils.Randomizer;
82import denoptim.utils.RotationalSpaceUtils;
83import denoptim.utils.SizeControlledSet;
84import denoptim.utils.StatUtils;
95 protected static HashMap<Integer, ArrayList<Integer>>
fragmentPool;
107 DecimalFormat
df = (DecimalFormat) NumberFormat.getNumberInstance(
109 df.setGroupingUsed(
false);
123 private static final String
NL =System.getProperty(
"line.separator");
124 private static final String
FSEP = System.getProperty(
"file.separator");
137 denoptim.files.FileUtils.createDirectory(
155 HashSet<String> lstUID =
new HashSet<>(1024);
156 if (!settings.getUIDFileIn().equals(
""))
159 for (String uid : lstUID)
161 uniqueIDsSet.addNewUniqueEntry(uid);
163 settings.getLogger().log(Level.INFO,
"Read " + lstUID.size()
164 +
" known UIDs from " + settings.getUIDFileIn());
168 int numFromInitGraphs = 0;
169 String initPopFile = settings.getInitialPopulationFile();
170 if (initPopFile.length() > 0)
175 numFromInitGraphs = population.size();
176 settings.getLogger().log(Level.INFO,
"Imported " + numFromInitGraphs
177 +
" candidates (as graphs) from " + initPopFile);
214 for (
int i=0; i<multiSiteMutationProb.length; i++)
215 tot = tot + multiSiteMutationProb[i];
217 double scaledHit = hit * tot;
222 for (
int i=0; i<multiSiteMutationProb.length; i++)
224 max = max + multiSiteMutationProb[i];
225 if (min < scaledHit && scaledHit <= max)
230 min = Math.max(min,min+multiSiteMutationProb[i]);
250 double xoverWeight,
double mutWeight,
double newWeight,
254 * (xoverWeight + mutWeight + newWeight);
255 if (hit <= xoverWeight)
258 }
else if (xoverWeight < hit && hit <= (mutWeight+xoverWeight))
280 List<Candidate> eligibleParents,
Population population,
284 null, -1, -1, settings, settings.maxOffsprintFromXover());
300 List<Candidate> eligibleParents,
Population population,
304 null, -1, -1, settings);
334 List<Candidate> eligibleParents,
Population population,
335 Monitor mnt,
int[] choiceOfParents,
int choiceOfXOverSites,
341 choiceOfParents, choiceOfXOverSites, choiceOfOffstring,
379 List<Candidate> eligibleParents,
Population population,
380 Monitor mnt,
int[] choiceOfParents,
int choiceOfXOverSites,
400 boolean foundPars =
false;
401 while (numatt < settings.getMaxGeneticOpAttempts())
406 population, choiceOfParents, choiceOfXOverSites,
416 eligibleParents, 2, settings);
417 if (parents[0] ==
null || parents[1] ==
null)
424 List<Vertex> subGraphA =
new ArrayList<Vertex>();
426 gpA, settings.getRandomizer()),subGraphA);
429 List<Vertex> subGraphB =
new ArrayList<Vertex>();
431 gpB, settings.getRandomizer()),subGraphB);
442 return new ArrayList<Candidate>();
446 Vertex vA =
null, vB =
null;
447 vA = xos.
getA().get(0);
448 vB = xos.
getB().get(0);
451 DGraph gB = vB.getGraphOwner();
455 String candIdB = cB.getName();
461 DGraph gAClone = xosOnClones.
getA().get(0).getGraphOwner();
462 DGraph gBClone = xosOnClones.
getB().get(0).getGraphOwner();
470 return new ArrayList<Candidate>();
472 }
catch (Throwable t) {
473 if (!settings.xoverFailureTolerant)
476 ArrayList<DGraph> parents =
new ArrayList<DGraph>();
480 +
"_failed_xover.sdf"), parents,
true,
481 settings.getLogger(), settings.getRandomizer());
484 +
"XOverSite(C): " + xosOnClones.
toString() +
NL
485 +
" Please, report this to the authors ",t);
489 return new ArrayList<Candidate>();
495 lstIdVA = lstIdVA +
"_" + v.getVertexId();
498 lstIdVB = lstIdVB +
"_" + v.getVertexId();
499 String[] msgs =
new String[2];
502 +
"|" + gid1 +
"|" + lstIdVA
504 +
"Gen:" + cB.getGeneration() +
" Cand:" + candIdB
505 +
"|" + gid2 +
"|" + lstIdVB;
507 +
"Gen:" + cB.getGeneration() +
" Cand:" + candIdB
508 +
"|" + gid2 +
"|" + lstIdVB
511 +
"|" + gid1 +
"|" + lstIdVA;
514 graphsAffectedByXover[0] = gAClone;
515 graphsAffectedByXover[1] = gBClone;
517 List<Candidate> validOffspring =
new Population(settings);
518 for (
int ig=0; ig<graphsAffectedByXover.length; ig++)
520 DGraph g = graphsAffectedByXover[ig];
542 }
catch (NullPointerException|IllegalArgumentException e)
544 if (!settings.xoverGraphFailedEvalTolerant)
546 ArrayList<DGraph> parents =
new ArrayList<DGraph>();
549 parents.add(gAClone);
550 parents.add(gBClone);
552 +
"_failed_xover-ed_check.sdf"), parents,
true,
553 settings.getLogger(), settings.getRandomizer());
573 APClass apc = rcv.getEdgeToParent().getSrcAP().getAPClass();
590 offspring.
setUID(res[0].toString().trim());
591 offspring.
setSmiles(res[1].toString().trim());
594 validOffspring.add(offspring);
597 if (validOffspring.size() == 0)
600 return new ArrayList<Candidate>();
603 if (maxCandidatesToReturn==1)
606 if (choiceOfOffstring<0)
608 chosenOffspring = settings.getRandomizer().randomlyChooseOne(
614 chosenOffspring = validOffspring.get(choiceOfOffstring);
616 validOffspring.retainAll(Arrays.asList(chosenOffspring));
625 return validOffspring;
631 List<Candidate> eligibleParents,
Monitor mnt,
647 while (numatt < settings.getMaxGeneticOpAttempts())
667 String parentMolName = FilenameUtils.getBaseName(parent.
getSDFFile());
670 +
" Gen:" + parent.
getGeneration() +
" Cand:" + parentMolName
671 +
"|" + parentGraphId);
688 }
catch (NullPointerException|IllegalArgumentException e)
690 if (!settings.mutatedGraphFailedEvalTolerant)
692 settings.getLogger().log(Level.INFO,
"WRITING DEBUG FILE for "
695 parent.
getGraph(),
false, settings.getLogger(),
696 settings.getRandomizer());
698 graph,
false, settings.getLogger(),
699 settings.getRandomizer());
723 APClass apc = rcv.getEdgeToParent().getSrcAP().getAPClass();
741 offspring.
setUID(res[0].toString().trim());
742 offspring.
setSmiles(res[1].toString().trim());
760 ArrayList<DGraph> graphs;
764 }
catch (Exception e)
768 String msg =
"Could not read graphs from file " + srcFile
769 +
". No candidate generated!";
770 settings.getLogger().log(Level.SEVERE, msg);
773 if (graphs.size() == 0 || graphs.size() > 1)
776 String msg =
"Found " + graphs.size() +
" graphs in file " + srcFile
777 +
". I expect one and only one graph. "
778 +
"No candidate generated!";
779 settings.getLogger().log(Level.SEVERE, msg);
783 DGraph graph = graphs.get(0);
787 String msg =
"Null graph from file " + srcFile
788 +
". Expected one and only one graph. "
789 +
"No candidate generated!";
790 settings.getLogger().log(Level.SEVERE, msg);
810 candidate.
setUID(res[0].toString().trim());
811 candidate.
setSmiles(res[1].toString().trim());
818 String msg =
"Candidate " + candidate.
getName() +
" is imported from "
820 settings.getLogger().log(Level.INFO, msg);
875 if (rcv.getEdgeToParent() ==
null)
880 if (rcv.getEdgeToParent() ==
null)
885 APClass apc = rcv.getEdgeToParent().getSrcAP().getAPClass();
902 candidate.
setUID(res[0].toString().trim());
903 candidate.
setSmiles(res[1].toString().trim());
950 +
"fragmentation but no cutting rules provided. Please,"
951 +
"add FRG-CUTTINGRULESFILE=path/to/your/file to the "
970 String msg =
"Unable to convert molecule (" + mol.getAtomCount()
971 +
" atoms) to DENPTIM graph. " + de.getMessage();
972 settings.getLogger().log(Level.WARNING, msg);
1004 candidate.
setUID(res[0].toString().trim());
1005 candidate.
setSmiles(res[1].toString().trim());
1029 List<CuttingRule> cuttingRules, Logger logger,
1034 scaffoldingPolicy, 190);
1054 List<CuttingRule> cuttingRules, Logger logger,
1060 cuttingRules, logger);
1061 for (
Vertex v : fragments)
1071 if (fragments.size()==0)
1074 + mol.getAtomCount() +
" atoms produced 0 fragments.");
1079 switch (scaffoldingPolicy)
1083 for (
Vertex v : fragments)
1087 boolean setAsScaffold =
false;
1089 for (IAtom atm : iac.atoms())
1091 if (scaffoldingPolicy.label.equals(
1094 setAsScaffold =
true;
1109 case LARGEST_FRAGMENT:
1112 scaffold = fragments.stream()
1113 .max(Comparator.comparing(
1116 }
catch (Exception e)
1119 +
"among " + fragments.size() +
" fragments.", e);
1127 +
"identified as the "
1136 AtomicInteger vId =
new AtomicInteger(1);
1137 for (
int i=1; i<fragments.size(); i++)
1154 Vertex lastlyAdded =
null;
1155 for (
int i=-1; i>-4; i--)
1157 lastlyAdded = graph.getVertexList().get(
1158 graph.getVertexList().size()+i);
1159 if (!lastlyAdded.
isRCV())
1164 if (!apI.isAvailable())
1167 for (
int j=0; j<vertexes.size(); j++)
1169 Vertex fragJ = vertexes.get(j);
1171 boolean ringClosure =
false;
1172 if (graph.containsVertex(fragJ))
1181 if (apI.getCutId()==apJ.getCutId())
1190 graph.appendVertexOnAP(apI, rcvI.
getAP(0));
1196 graph.appendVertexOnAP(apJ, rcvJ.
getAP(0));
1197 graph.addRing(rcvI, rcvJ);
1201 graph.appendVertexOnAP(apI, apJ);
1227 String filename,
GAParameters settings,
boolean printpathNames)
1230 StringBuilder sb =
new StringBuilder(512);
1234 df.setMaximumFractionDigits(settings.getPrecisionLevel());
1235 df.setMinimumFractionDigits(settings.getPrecisionLevel());
1240 synchronized (population)
1242 List<Candidate> popMembers =
new ArrayList<Candidate>();
1243 for (
int i=0; i<settings.getPopulationSize(); i++)
1246 popMembers.add(mol);
1251 sb.append(String.format(
"%-20s", mname));
1253 sb.append(String.format(
"%-20s",
1255 sb.append(String.format(
"%-30s", mol.
getUID()));
1263 sb.append(System.getProperty(
"line.separator"));
1270 if (settings.savePopFile())
1272 File dest =
new File(filename.replaceAll(
"\\.txt$",
".sdf"));
1276 if (stats.trim().length() > 0)
1293 StringBuilder sb =
new StringBuilder(128);
1294 sb.append(
NL+
NL+
"#####POPULATION SUMMARY#####"+
NL);
1295 int n = popln.size();
1296 sb.append(String.format(
"%-30s",
"SIZE:"));
1297 sb.append(String.format(
"%12s", n));
1301 sb.append(String.format(
"%-30s",
"MAX:")).append(
df.format(f));
1304 sb.append(String.format(
"%-30s",
"MIN:")).append(
df.format(f));
1307 sb.append(String.format(
"%-30s",
"MEAN:")).append(
df.format(f));
1310 sb.append(String.format(
"%-30s",
"MEDIAN:")).append(
df.format(f));
1313 sb.append(String.format(
"%-30s",
"STDDEV:")).append(
df.format(f));
1318 sb.append(String.format(
"%-30s",
"SKEW:")).append(
df.format(f));
1321 sb.append(String.format(
"%-30s",
"SKEW:")).append(
" NaN (sdev too small)");
1325 res = sb.toString();
1343 List<Candidate> eligibleParents,
int number,
GAParameters settings)
1374 List<Vertex> candidates =
new ArrayList<Vertex>(
1376 candidates.removeIf(v ->
1378 || v.getBuildingBlockType() ==
BBType.
CAP);
1400 List<Candidate> eligibleParents,
Population population,
1401 int[] choiceOfParents,
int choiceOfXOverSites,
GAParameters settings)
1404 if (choiceOfParents==
null)
1407 parentA = eligibleParents.get(choiceOfParents[0]);
1409 if (parentA ==
null)
1420 parentA, eligibleParents, fragSpace);
1421 if (matesCompatibleWithFirst.size() == 0)
1425 if (choiceOfParents==
null)
1430 parentB = eligibleParents.get(choiceOfParents[1]);
1432 if (parentB ==
null)
1436 if (choiceOfXOverSites<0)
1442 choiceOfXOverSites);
1452 StringBuilder sb =
new StringBuilder(32);
1460 return sb.toString();
1468 StringBuilder sb =
new StringBuilder(32);
1480 return sb.toString();
1487 StringBuilder sb =
new StringBuilder(32);
1489 return sb.toString();
1497 StringBuilder sb =
new StringBuilder(32);
1499 .append(
FSEP).append(
"Final.txt");
1500 return sb.toString();
1519 denoptim.files.FileUtils.createDirectory(dirName);
1520 File fileDir =
new File(dirName);
1522 boolean intermediateCandidatesAreOnDisk =
1526 for (
int i=0; i<popln.size(); i++)
1533 if (intermediateCandidatesAreOnDisk && sdfile!=
null)
1535 FileUtils.copyFileToDirectory(
new File(sdfile), fileDir);
1537 File candFile =
new File(fileDir, c.
getName()
1542 }
catch (IOException ioe) {
1544 + sdfile +
"' to '" + fileDir +
"' for candidate "
1547 if (imgfile !=
null && intermediateCandidatesAreOnDisk)
1550 FileUtils.copyFileToDirectory(
new File(imgfile), fileDir);
1551 }
catch (IOException ioe) {
1553 + imgfile +
"' to '" + fileDir +
"' for candidate "
1579 new File(filename),
true);
1580 if (candidates.size() == 0)
1582 String msg =
"Found 0 candidates in file " + filename;
1583 settings.getLogger().log(Level.SEVERE, msg);
1589 if (uniqueIDsSet.addNewUniqueEntry(candidate.getUID()))
1595 candidate.setName(molName);
1596 candidate.getGraph().setGraphId(gctr);
1597 candidate.getGraph().setLocalMsg(
"INITIAL_POPULATION");
1598 String sdfPathName = genDir + System.getProperty(
"file.separator")
1600 candidate.setSDFFile(sdfPathName);
1601 candidate.setImageFile(
null);
1607 population.add(candidate);
1611 if (population.isEmpty())
1613 String msg =
"Population is still empty after having processes "
1614 + candidates.size() +
" candidates from file " + filename;
1615 settings.getLogger().log(Level.SEVERE, msg);
1624 protected static void writeUID(String outfile, HashSet<String> lstInchi,
1627 StringBuilder sb =
new StringBuilder(256);
1628 Iterator<String> iter = lstInchi.iterator();
1630 boolean first =
true;
1631 while(iter.hasNext())
1635 sb.append(iter.next());
1640 sb.append(
NL).append(iter.next());
1660 long val = Long.MIN_VALUE;
1663 DGraph g = popln1.getGraph();
1700 && !((
Template) scafVertex).getContractLevel().equals(
1704 mnt.name =
"IntraTemplateBuild";
1706 settings.getExcludedMutationTypes());
1707 for (
Vertex mutableSite : initialMutableSites)
1749 if (!(scafVertex instanceof
Template)
1799 settings.getRandomizer());
1804 String rotoSpaceFile =
"";
1811 true, settings.getLogger());
1818 boolean onlyRandomCombOfRings =
true;
1820 if (onlyRandomCombOfRings)
1824 if (combsOfRings.size() > 0)
1826 for (
Ring ring : combsOfRings)
1829 double shot = settings.getRandomizer().nextDouble();
1831 ring.getHeadVertex().getEdgeToParent().getSrcAP(),
1834 ring.getTailVertex().getEdgeToParent().getSrcAP(),
1841 if (shot < crowdProbH && shot < crowdProbT)
1843 molGraph.addRing(ring);
1850 ArrayList<List<Ring>> allCombsOfRings =
1856 ArrayList<List<Ring>> toRemove =
new ArrayList<>();
1857 for (List<Ring> setRings : allCombsOfRings)
1861 toRemove.add(setRings);
1865 allCombsOfRings.removeAll(toRemove);
1866 if (allCombsOfRings.isEmpty())
1868 String msg =
"Setup Rings: no combination of rings.";
1869 settings.getLogger().log(Level.INFO, msg);
1875 int sz = allCombsOfRings.size();
1878 List<Ring> selected =
new ArrayList<>();
1881 selected = allCombsOfRings.get(0);
1885 int selId = settings.getRandomizer().nextInt(sz);
1886 selected = allCombsOfRings.get(selId);
1890 for (
Ring ring : selected)
1892 molGraph.addRing(ring);
1908 settings.getLogger());
1909 if (molsmiles ==
null)
1911 String msg =
"Evaluation of graph: SMILES is null! "
1912 + molGraph.toString();
1913 settings.getLogger().log(Level.INFO, msg);
1914 molsmiles =
"FAIL: NO SMILES GENERATED";
1923 settings.getLogger());
1924 if (inchikey ==
null)
1926 String msg =
"Evaluation of graph: INCHI is null!";
1927 settings.getLogger().log(Level.INFO, msg);
1928 inchikey =
"UNDEFINED";
1952 if (mol.getUID().compareToIgnoreCase(molcode) == 0)
1970 int k = mols.size();
1971 double[] arr =
new double[k];
1973 for (
int i=0; i<k; i++)
1975 arr[i] = mols.get(i).getFitness();
2010 double lambda,
double sigmaOne,
double sigmaTwo)
2012 return getProbability(level, scheme, lambda, sigmaOne, sigmaTwo);
2056 int scheme,
double lambda,
double sigmaOne,
double sigmaTwo)
2059 sigmaOne, sigmaTwo);
2075 int scheme,
double lambda,
double sigmaOne,
double sigmaTwo)
2080 double f = Math.exp(-1.0 * value * lambda);
2081 prob = 1 - ((1-f)/(1+f));
2083 else if (scheme == 1)
2085 prob = 1.0 - Math.tanh(lambda * value);
2087 else if (scheme == 2)
2089 prob = 1.0-1.0/(1.0 + Math.exp(-sigmaOne * (value - sigmaTwo)));
2091 else if (scheme == 3)
2162 sigmaOne, sigmaTwo);
2191 boolean ignoreFreeRCVs)
2202 && !oap.isAvailableThroughout()
2203 && oap.getLinkedAP().getOwner()
2206 if (ignoreFreeRCVs && oap.getLinkedAP().getOwner().isRCV())
2209 crowdness = crowdness + 1;
2211 crowdness = crowdness + 1;
2234 double lambda,
double sigmaOne,
double sigmaTwo)
2243 sigmaOne, sigmaTwo);
2259 double lambda,
double sigmaOne,
double sigmaTwo)
2261 return getProbability(crowdedness, scheme, lambda, sigmaOne, sigmaTwo);
2280 for (
Vertex vtx : vertices)
2282 List<AttachmentPoint> daps = vtx.getAttachmentPoints();
2285 if (dp.isAvailable())
2287 APClass apClass = dp.getAPClass();
2288 if (classOfForbEnds.contains(apClass))
2290 String msg =
"Forbidden free AP for Vertex: "
2292 +
" MolId: " + (vtx.getBuildingBlockId() + 1)
2293 +
" Ftype: " + vtx.getBuildingBlockType()
2294 +
"\n"+ molGraph+
" \n "
2295 +
" AP class: " + apClass;
2296 fsParams.
getLogger().log(Level.WARNING, msg);
2307 protected static void readUID(String infile, HashSet<String> lstInchi)
2311 for (String str:lst)
General set of constants used in DENOPTIM.
static final int MOLDIGITS
static final String GAGENSUMMARYHEADER
Header of text files collection generation details.
static final String GAGENDIRNAMEROOT
Prefix for generation folders.
static final String FITFILENAMEEXTOUT
Ending and extension of output file of external fitness provider.
Settings defining the calculation of fitness.
Class defining a space of building blocks.
boolean useAPclassBasedApproach()
Check usage of APClass-based approach, i.e., uses attachment points with annotated data (i....
Set< APClass > getForbiddenEndList()
Vertex makeRandomScaffold()
Randomly select a scaffold and return a fully configured clone of it.
HashMap< APClass, APClass > getCappingMap()
Parameters defining the fragment space.
FragmentSpace getFragmentSpace()
Helper methods for the genetic algorithm.
static void outputFinalResults(Population popln, GAParameters settings)
Saves the final results to disk.
static List< Candidate > buildCandidatesByXOver(List< Candidate > eligibleParents, Population population, Monitor mnt, int[] choiceOfParents, int choiceOfXOverSites, int choiceOfOffstring, GAParameters settings, int maxCandidatesToReturn)
Generates up to a pair of new offspring by performing a crossover operation.
static CandidateSource pickNewCandidateGenerationMode(double xoverWeight, double mutWeight, double newWeight, Randomizer randomizer)
Takes a decision on which CandidateSource method to use for generating a new Candidate.
static double getGrowthByLevelProbability(int level, GAParameters settings)
Calculates the probability of adding a fragment to the given level.
static double getCrowdingProbability(int crowdedness, GAParameters settings)
Calculated the probability of using and attachment point rooted on an atom that is holding other atta...
static int chooseNumberOfSitesToMutate(double[] multiSiteMutationProb, double hit)
Takes a decision on how many sites to mutate on a candidate.
static void appendVertexesToGraphFollowingEdges(DGraph graph, AtomicInteger vId, List< Vertex > vertexes)
static Candidate readCandidateFromFile(File srcFile, Monitor mnt, GAParameters settings)
static Candidate[] selectBasedOnFitness(List< Candidate > eligibleParents, int number, GAParameters settings)
Selects a number of members from the given population.
static DGraph makeGraphFromFragmentationOfMol(IAtomContainer mol, List< CuttingRule > cuttingRules, Logger logger, ScaffoldingPolicy scaffoldingPolicy, double linearAngleLimit)
Converts a molecule into a DGraph by fragmentation and re-assembling of the fragments.
static boolean setupRings(Object[] res, DGraph molGraph, GAParameters settings)
Evaluates the possibility of closing rings in a given graph and if any ring can be closed,...
static Candidate buildCandidateByXOver(List< Candidate > eligibleParents, Population population, Monitor mnt, GAParameters settings)
Generates a new offspring by performing a crossover operation.
static List< Candidate > buildCandidatesByXOver(List< Candidate > eligibleParents, Population population, Monitor mnt, GAParameters settings)
Generates a pair of new offspring by performing a crossover operation.
static void getPopulationFromFile(String filename, Population population, SizeControlledSet uniqueIDsSet, String genDir, GAParameters settings)
Reconstruct the molecular population from the file.
static void writeUID(String outfile, HashSet< String > lstInchi, boolean append)
static HashMap< Integer, ArrayList< String > > lstFragmentClass
static double getGrowthProbabilityAtLevel(int level, int scheme, double lambda, double sigmaOne, double sigmaTwo)
Calculates the probability of adding a fragment to the given level.
static DGraph makeGraphFromFragmentationOfMol(IAtomContainer mol, List< CuttingRule > cuttingRules, Logger logger, ScaffoldingPolicy scaffoldingPolicy)
Converts a molecule into a DGraph by fragmentation and re-assembling of the fragments.
static double getProbability(double value, int scheme, double lambda, double sigmaOne, double sigmaTwo)
Calculated a probability given parameters defining the shape of the probability function and a single...
static XoverSite performFBCC(List< Candidate > eligibleParents, Population population, int[] choiceOfParents, int choiceOfXOverSites, GAParameters settings)
Perform fitness-based, class-compatible selection of parents that can do crossover operations.
static String getPathNameToFinalPopulationFolder(GAParameters settings)
static CandidateSource chooseGenerationMethod(GAParameters settings)
Choose one of the methods to make new Candidates.
static Locale enUsLocale
Locale used to write reports.
static double getCrowdingProbability(AttachmentPoint ap, GAParameters settings)
Calculated the probability of using and attachment point rooted on an atom that is holding other atta...
static boolean foundForbiddenEnd(DGraph molGraph, FragmentSpaceParameters fsParams)
Check if there are forbidden ends: free attachment points that are not suitable for capping and not a...
static void setVertexCounterValue(Population population)
Set the Vertex counter value according to the largest value found in the given population.
static Candidate buildCandidateByMutation(List< Candidate > eligibleParents, Monitor mnt, GAParameters settings)
static Candidate buildCandidateByFragmentingMolecule(IAtomContainer mol, Monitor mnt, GAParameters settings, int index)
Generates a candidate by fragmenting a molecule and generating the graph that reconnects all fragment...
static DGraph buildGraph(GAParameters settings)
Graph construction starts with selecting a random core/scaffold.
static Candidate buildCandidateByXOver(List< Candidate > eligibleParents, Population population, Monitor mnt, int[] choiceOfParents, int choiceOfXOverSites, int choiceOfOffstring, GAParameters settings)
Generates a new offspring by performing a crossover operation.
static double[] getFitnesses(Population mols)
Get the fitness values for the list of molecules.
static String getSummaryStatistics(Population popln, GAParameters settings)
static void createFolderForGeneration(int genId, GAParameters settings)
Creates a folder meant to hold all the data generated during a generation.
static double getMolSizeProbability(DGraph graph, int scheme, double lambda, double sigmaOne, double sigmaTwo)
Calculated the probability of extending a graph based on the current size of the molecular representa...
static DecimalFormat initialiseFormatter()
static int getCrowdedness(AttachmentPoint ap)
Calculate the current crowdedness of the given attachment point.
static int getCrowdedness(AttachmentPoint ap, boolean ignoreFreeRCVs)
Calculate the current crowdedness of the given attachment point.
static HashMap< Integer, ArrayList< Integer > > fragmentPool
static void outputPopulationDetails(Population population, String filename, GAParameters settings, boolean printpathNames)
Write out summary for the current GA population.
static Vertex selectNonScaffoldNonCapVertex(DGraph g, Randomizer randomizer)
Chose randomly a vertex that is neither scaffold or capping group.
static double getCrowdingProbabilityForCrowdedness(int crowdedness, int scheme, double lambda, double sigmaOne, double sigmaTwo)
Calculated the crowding probability for a given level of crowdedness.
static String getPathNameToGenerationFolder(int genID, GAParameters settings)
static double getPopulationSD(Population molPopulation)
Check if fitness values have significant standard deviation.
static String getPathNameToFinalPopulationDetailsFile(GAParameters settings)
static Candidate buildCandidateFromScratch(Monitor mnt, GAParameters settings)
static double getCrowdingProbability(AttachmentPoint ap, int scheme, double lambda, double sigmaOne, double sigmaTwo)
Calculated the probability of using and attachment point rooted on an atom that is holding other atta...
static boolean containsMolecule(Population mols, String molcode)
Check if the population contains the specified InChi code.
static AttachmentPoint searchForApSuitableToRingClosure(AttachmentPoint apA, SymmetricAPs symAPsA, GAParameters settings)
static DecimalFormat df
Format for decimal fitness numbers that overwrites Locale to en_US.
static void readUID(String infile, HashSet< String > lstInchi)
static String getPathNameToGenerationDetailsFile(int genID, GAParameters settings)
static double getMolSizeProbability(DGraph graph, GAParameters settings)
Calculated the probability of extending a graph based on the current size of the molecular representa...
static Population importInitialPopulation(SizeControlledSet uniqueIDsSet, GAParameters settings)
Reads unique identifiers and initial population file according to the GAParameters.
Collection of operators meant to alter graphs and associated utilities.
static boolean extendGraph(Vertex curVertex, boolean extend, boolean symmetryOnAps, GAParameters settings)
function that will keep extending the graph according to the growth/substitution probability.
static boolean performMutation(DGraph graph, Monitor mnt, GAParameters settings)
Tries to do mutate the given graph.
static boolean performCrossover(XoverSite site, FragmentSpace fragSpace)
Performs the crossover that swaps the two subgraphs defining the given XoverSite.
A collection of candidates.
List< XoverSite > getXoverSites(Candidate parentA, Candidate parentB)
Returns a list of crossover sites between the two given parents.
List< Candidate > getXoverPartners(Candidate memberA, List< Candidate > eligibleParents, FragmentSpace fragSpace)
Returns a list of population members that can do crossover with the specified member.
Class that offers methods to performs fitness-driven selection of candidates.
static Candidate[] performRandomSelection(List< Candidate > population, int sz, RunTimeParameters settings)
Randomly select k individuals from the population.
static Candidate[] performTournamentSelection(List< Candidate > eligibleParents, int sz, RunTimeParameters settings)
Select p individuals at random.
static Candidate[] performRWS(List< Candidate > population, int sz, RunTimeParameters settings)
Roulette wheel selection is implemented as follows:
static Candidate[] performSUS(List< Candidate > population, int sz, RunTimeParameters settings)
Stochastic Uniform Sampling Note: this implementation is based on the WATCHMAKER framework http://wat...
This class collects the data identifying the subgraphs that would be swapped by a crossover event.
XoverSite projectToClonedGraphs()
Creates a new instance of this class that contains the list of vertexes that correspond to those cont...
String toString()
Produced a string for showing what this object is.
List< Vertex > getA()
Returns the collection of vertexes belonging to the first subgraph.
List< Vertex > getB()
Returns the collection of vertexes belonging to the second subgraph.
static final APClass RCACLASSMINUS
Conventional class of attachment points on ring-closing vertexes.
static final String ATPLUS
String defining a conventional APClass.
static APClass make(String ruleAndSubclass)
Creates an APClass if it does not exist already, or returns the reference to the existing instance.
An attachment point (AP) is a possibility to attach a Vertex onto the vertex holding the AP (i....
int getAtomPositionNumber()
The index of the source atom in the atom list of the fragment.
A candidate is the combination of a denoptim graph with molecular representation and may include also...
void setSDFFile(String molFile)
void setSmiles(String smiles)
int getGeneration()
The generation this candidate belong to is that in which it was generated.
void setName(String name)
void setChemicalRepresentation(IAtomContainer iac)
Just place the argument in the IAtomContainer field of this object.
Container for the list of vertices and the edges that connect them.
void setCandidateClosableChains(ArrayList< ClosableChain > closableChains)
void addVertex(Vertex vertex)
Appends a vertex to this graph without creating any edge.
DGraph embedPatternsInTemplates(GraphPattern pattern, FragmentSpace fragSpace)
Searches for the given pattern type and generated a new graph where each set of (clones of) vertexes ...
void getChildrenTree(Vertex vertex, List< Vertex > children)
Gets all the children of the current vertex recursively.
ArrayList< Vertex > getFreeRCVertices()
Search for unused ring closing vertices: vertices that contain only a RingClosingAttractor and are no...
Object[] checkConsistency(RunTimeParameters settings)
Peeks into this graph to derive a preliminary chemical representation with SMILES and InChIKey.
List< Vertex > getVertexList()
DGraph clone()
Returns almost "deep-copy" of this graph.
void renumberGraphVertices()
Reassign vertex IDs to all vertices of this graph.
boolean containsOrEmbedsVertex(Vertex v)
Check if the specified vertex is contained in this graph as a node or in any inner graphs that may be...
DGraph getOutermostGraphOwner()
void addCappingGroups(FragmentSpace fragSpace)
Add a capping groups on free unused attachment points.
void cleanup()
Wipes the data in this graph.
Candidate getCandidateOwner()
Returns the reference of the candidate item that is defined by this graph.
ArrayList< Vertex > getUsedRCVertices()
Search for used ring closing vertices: vertices that contain only a RingClosingAttractor and are part...
int getHeavyAtomsCount()
Calculate the number of atoms from the graph representation.
List< Vertex > getMutableSites()
A list of mutation sites from within this graph.
void setLocalMsg(String msg)
boolean detectSymVertexSets()
Tries to determine the set of symmetric vertices in this graph based on finding compatible Vertexes t...
An empty vertex has the behaviors of a vertex, but has no molecular structure.
Class representing a continuously connected portion of chemical object holding attachment points.
void updateAPs()
Changes the properties of each APs as to reflect the current atom list.
This class represents the closure of a ring in a spanning tree.
A collection of AttachmentPoints that are related by a relation that we call "symmetry",...
A vertex is a data structure that has an identity and holds a list of AttachmentPoints.
int getBuildingBlockId()
Returns the index of the building block that should correspond to the position of the building block ...
void setVertexId(long vertexId2)
DGraph getGraphOwner()
Returns the graph this vertex belongs to or null.
abstract List< AttachmentPoint > getAttachmentPoints()
abstract int getHeavyAtomsCount()
void setBuildingBlockType(Vertex.BBType buildingBlockType)
abstract IAtomContainer getIAtomContainer()
AttachmentPoint getAP(int i)
Get attachment point i on this vertex.
This is a tool to identify and manage vertices' connections not included in the DGraph,...
List< Ring > getRandomCombinationOfRings(IAtomContainer inMol, DGraph molGraph, int maxRingClosures)
Identifies a random combination of ring closing paths and returns it as list of DENOPTIMRings ready t...
boolean checkChelatesGraph(DGraph molGraph, List< Ring > ringsSet)
Evaluates the combination of a DENOPTIMGraph and a set of DENOPTIMRings and decides whether it's a pr...
ArrayList< List< Ring > > getPossibleCombinationOfRings(IAtomContainer mol, DGraph molGraph)
Identifies all possible ring closing paths and returns them as list of DENOPTIMRings ready to be appe...
Parameters and setting related to handling ring closures.
RingClosuresArchive getRingClosuresArchive()
boolean buildChelatesMode
Flag activating procedures favoring formation of chelates.
void allowRingClosures(boolean value)
Data structure to store and handle information about sub-structures (i.e., chains of fragments) and r...
ArrayList< ClosableChain > getCCFromTurningPointId(int tpId)
Returns the library of closable chains having the given turning point (i.e., the fragments involved i...
Utility methods for input/output.
static ArrayList< Candidate > readCandidates(File file)
Reads SDF files that represent one or more tested candidates.
static void writeGraphsToSDF(File file, List< DGraph > graphs, Logger logger, Randomizer randomizer)
Writes the graphs to SDF file.
static void writeCandidateToFile(File file, Candidate candidate, boolean append)
Writes one candidate item to file.
static void writeGraphToSDF(File file, DGraph graph, boolean append, boolean make3D, Logger logger, Randomizer randomizer)
Writes the graph to SDF file.
static ArrayList< DGraph > readDENOPTIMGraphsFromFile(File inFile)
Reads a list of <DGraphs from file.
static ArrayList< String > readList(String fileName)
Read list of data as text.
static void writeData(String fileName, String data, boolean append)
Write text-like data file.
static void writeCandidatesToFile(File file, List< Candidate > popMembers, boolean append)
Writes candidate items to file.
A collection of counters user to count actions taken by the evolutionary algorithm.
void increase(CounterID cid)
Tool to build build three-dimensional (3D) tree-like molecular structures from DGraph.
void setAlignBBsIn3D(boolean align)
Sets the flag that controls whether building blocks have to be aligned according to the AP vectors or...
IAtomContainer convertGraphTo3DAtomContainer(DGraph graph)
Created a three-dimensional molecular representation from a given DGraph.
boolean containsParameters(ParametersType type)
RunTimeParameters getParameters(ParametersType type)
Logger getLogger()
Get the name of the program specific logger.
Randomizer getRandomizer()
Returns the current program-specific randomizer.
Parameters for genetic algorithm.
double getConstructionWeight()
double getMutationWeight()
boolean useMolSizeBasedProb
Flag recording the intention to use molecular size-controlled graph extension probability.
int getMolGrowthProbabilityScheme()
int getCrowdingProbabilityScheme()
String getDataDirectory()
double getGrowthFactorMiddleSigma()
double getMolGrowthFactorMiddleSigma()
boolean useLevelBasedProb
Flag recording the intention to use level-controlled graph extension probability.
int getNumberOfGenerations()
double getMolGrowthFactorSteepSigma()
double getGrowthFactorSteepSigma()
double getMolGrowthMultiplier()
double getCrowdingFactorSteepSigma()
double getCrowdingFactorMiddleSigma()
int getGrowthProbabilityScheme()
double getGrowthMultiplier()
int getSelectionStrategyType()
double getCrowdingMultiplier()
double getCrossoverWeight()
Parameters controlling execution of the fragmenter.
double getLinearAngleLimit()
ScaffoldingPolicy getScaffoldingPolicy()
List< CuttingRule > getCuttingRules()
ContractLevel getEmbeddedRingsContract()
boolean embedRingsInTemplate
Flag that enables the embedding of rings in templates upon conversion of molecules into DGraph.
Toll to add/remove dummy atoms from linearities or multi-hapto sites.
static void addDummiesOnLinearities(Fragment frag, double angLim)
Append dummy atoms on otherwise linear arrangements of atoms.
static String getPaddedString(int count, int number)
returns the padded string with zeroes placed to the left of 'number' up to reach the desired number o...
static synchronized void ensureVertexIDConsistency(long l)
Method used to ensure consistency between internal atomic integer and vertex id from imported graphs.
static synchronized int getUniqueMoleculeIndex()
Unique counter for the number of molecules generated.
static synchronized int getUniqueGraphIndex()
Unique counter for the number of graphs generated.
Utilities for molecule conversion.
static String getInChIKeyForMolecule(IAtomContainer mol, Logger logger)
Generates the InChI key for the given atom container.
static String getSMILESForMolecule(IAtomContainer mol, Logger logger)
Returns the SMILES representation of the molecule.
static String getSymbolOrLabel(IAtom atm)
Gets either the elemental symbol (for standard atoms) of the label (for pseudo-atoms).
Tool to generate random numbers and random decisions.
public< T > T randomlyChooseOne(Collection< T > c)
Chooses one member among the given collection.
double nextDouble()
Returns the next pseudo-random, uniformly distributed double value between 0.0 and 1....
Tool box for definition and management of the rotational space, which is given by the list of rotatab...
static ArrayList< ObjectPair > defineRotatableBonds(IAtomContainer mol, String defRotBndsFile, boolean addIterfragBonds, boolean excludeRings, Logger logger)
Define the rotational space (also torsional space) for a given molecule.
Class meant to collect unique strings without leading to memory overflow.
Utilities for calculating basic statistics.
static double mean(double[] numbers)
Returns the mean number in the numbers list.
static double stddev(double[] numbers, boolean biasCorrected)
Returns the standard deviation of the numbers.
static double skewness(double[] m, boolean biasCorrected)
Computes the skewness of the available values.
static double median(double[] m)
Calculates median value of a sorted list.
static double min(double[] numbers)
Returns the minimum value among the numbers .
static double max(double[] numbers)
Returns the maximum value among the numbers .
Defines how to define the scaffold vertex of a graph.
A chosen method for generation of new Candidates.
Possible chemical bond types an edge can represent.
Enum specifying to what extent the template's inner graph can be changed.
FIXED
Inner graphs are effectively equivalent to the Fragment class, as no change in the inner structure is...
The type of building block.
FAILEDBUILDATTEMPTS_SETUPRINGS
FAILEDMANUALADDATTEMPTS_EVAL
FAILEDCONVERTBYFRAGATTEMPTS
FAILEDMUTATTEMTS_FORBENDS
FAILEDXOVERATTEMPTS_SETUPRINGS
FAILEDXOVERATTEMPTS_FORBENDS
FAILEDBUILDATTEMPTS_FORBIDENDS
FAILEDXOVERATTEMPTS_FINDPARENTS
FAILEDCONVERTBYFRAGATTEMPTS_TMPLEMBEDDING
FAILEDXOVERATTEMPTS_PERFORM
FAILEDCONVERTBYFRAGATTEMPTS_EVAL
FAILEDCONVERTBYFRAGATTEMPTS_FRAGMENTATION
FAILEDBUILDATTEMPTS_GRAPHBUILD
FAILEDMUTATTEMTS_SETUPRINGS
Identifier of the type of parameters.
FS_PARAMS
Parameters pertaining the definition of the fragment space.
FRG_PARAMS
Parameters controlling the fragmenter.
FIT_PARAMS
Parameters pertaining the calculation of fitness (i.e., the fitness provider).
RC_PARAMS
Parameters pertaining to ring closures in graphs.