19package denoptim.programs.fragmenter;
22import java.lang.reflect.Field;
23import java.text.SimpleDateFormat;
24import java.util.ArrayList;
26import java.util.HashMap;
27import java.util.HashSet;
28import java.util.LinkedHashMap;
32import java.util.concurrent.atomic.AtomicInteger;
33import java.util.logging.Level;
35import denoptim.constants.DENOPTIMConstants;
36import denoptim.exception.DENOPTIMException;
37import denoptim.files.FileFormat;
38import denoptim.files.FileUtils;
39import denoptim.fragmenter.FragmentClusterer;
40import denoptim.fragmenter.ScaffoldingPolicy;
41import denoptim.graph.DGraph;
42import denoptim.graph.Template.ContractLevel;
43import denoptim.graph.Vertex;
44import denoptim.graph.Vertex.BBType;
45import denoptim.io.DenoptimIO;
46import denoptim.logging.StaticLogger;
47import denoptim.programs.RunTimeParameters;
48import denoptim.utils.FormulaUtils;
75 private LinkedHashMap<String, String>
formulae;
154 new HashMap<String,Double>();
162 new HashSet<Map<String,Double>>();
499 this.cutRulesFile = pathname;
710 Map<String, Double> formulaMax)
712 this.formulaCriteriaLessThan = formulaMax;
1024 this.scaffoldingPolicy = sp;
1061 switch (key.toUpperCase())
1067 case "STRUCTURESFILE=":
1071 case "FORMULATXTFILE=":
1076 case "PREFILTERSMARTS=":
1081 case "CUTTINGRULESFILE=":
1086 case "ADDEXPLICITHYDROGEN":
1090 case "UNSETTOSINGLEBO":
1094 case "IGNORABLEFRAGMENTS=":
1099 case "TARGETFRAGMENTS=":
1104 case "ISOMORPHICSAMPLESIZE=":
1107 }
catch (Throwable t)
1109 msg =
"Unable to parse value of " + key +
": '" + value +
"'";
1116 case "REMOVEDUPLICATES":
1123 }
catch (Throwable t)
1125 msg =
"Unable to parse value of " + key +
": '" + value +
"'";
1130 case "REJECTMINORISOTOPES":
1135 case "REJECTELEMENT=":
1140 case "REJFORMULALESSTHAN=":
1143 msg =
"Attempt to specify more than one criterion for "
1144 +
"rejecting fragments based on a lower-limit "
1145 +
"molecular formula. ";
1148 Map<String,Double> elSymbolsCount =
null;
1151 }
catch (Throwable t)
1153 msg =
"Unable to parse value of " + key +
": '" + value +
"'";
1160 case "REJFORMULAMORETHAN=":
1161 Map<String,Double> elSymbolsCount2=
null;
1164 }
catch (Throwable t)
1166 msg =
"Unable to parse value of " + key +
": '" + value +
"'";
1173 case "REJECTAPCLASS=":
1178 case "REJECTAPCLASSCOMBINATION=":
1179 String[] lst = value.split(
"\\s+");
1184 case "MAXFRAGSIZE=":
1187 }
catch (Throwable t)
1189 msg =
"Unable to parse value of " + key +
": '" + value +
"'";
1195 case "MINFRAGSIZE=":
1198 }
catch (Throwable t)
1200 msg =
"Unable to parse value of " + key +
": '" + value +
"'";
1206 case "REJECTSMARTS=":
1211 case "RETAINSMARTS=":
1216 case "CLUSTERIZEANDCOLLECT=":
1219 switch (value.trim().toUpperCase())
1231 + key +
": '" + value +
"'");
1235 case "SAVECLUSTERS":
1240 case "SIZEUNIMODALPOPULATION=":
1244 case "MAXNOISEUNIMODALPOPULATION=":
1248 case "SDWEIGHTUNIMODALPOPULATION=":
1252 case "SCAFFOLDINGPOLICY=":
1253 String[] words = value.split(
"\\s+");
1256 words[0].toUpperCase());
1266 scaffoldingPolicy.label = words[1];
1268 }
catch (Throwable t)
1270 msg =
"Unable to parse value of " + key +
": '" + value +
"'";
1275 case "EMBEDRINGSINTEMPLATES=":
1281 case "RINGEMBEDDINGCONTRACT=":
1283 if (value.length() > 0)
1290 case "FRAGMENTATIONTEMPLATE=":
1304 case "PARALLELTASKS=":
1311 msg =
"Unable to understand value " + key +
"'" + value +
"'";
1323 msg =
"Unable to understand value " + key +
"'" + value +
"'";
1329 msg =
"Keyword " + key +
" is not a known Fragmenter-"
1330 +
"related keyword. Check input files.";
1344 if (!
workDir.equals(System.getProperty(
"user.dir")))
1350 "ISOMORPHICSAMPLESIZE");
1391 }
catch (Throwable e)
1404 }
catch (Throwable e)
1417 }
catch (Throwable e)
1433 +
"Output files associated with the current run are "
1443 String fileSep = System.getProperty(
"file.separator");
1444 boolean success =
false;
1447 SimpleDateFormat sdf =
new SimpleDateFormat(
"yyyyMMddkkmmss");
1448 String str =
"FRG" + sdf.format(
new Date());
1449 workDir = curDir + fileSep + str;
1466 StringBuilder sb =
new StringBuilder(1024);
1468 for (Field f : this.getClass().getDeclaredFields())
1472 sb.append(f.getName()).append(
" = ").append(
1473 f.get(
this)).append(
NL);
1478 +
" parameters. Cause: " + t);
1484 sb.append(otherCollector.getPrintedList());
1486 return sb.toString();
1626 this.useCentroidsAsRepresentativeConformer =
General set of constants used in DENOPTIM.
static final String MWSLOTFRAGSUNQFILENANEEND
Final part of filename used to collect unique fragments in a certain molecular weight slot.
static final String EOL
new line character
static final FileFormat TMPFRAGFILEFORMAT
Format for intermediate files used during fragmentation.
static final String MWSLOTFRAGSFILENAMEROOT
Initial part of filename used to collect fragments belonging to a certain molecular weight slot.
static final String MWSLOTFRAGSALLFILENANEEND
Final part of filename used to collect all samples fragments in a certain molecular weight slot inclu...
static boolean createDirectory(String fileName)
Creates a directory.
static void addToRecentFiles(String fileName, FileFormat ff)
Appends an entry to the list of recent files.
Utility methods for input/output.
static LinkedHashMap< String, String > readCSDFormulae(File file)
Read molecular formula from TXT data representation produced by Cambridge Structural Database tools (...
static void readCuttingRules(BufferedReader reader, List< CuttingRule > cutRules, String source)
Read cutting rules from a stream.
static ArrayList< DGraph > readDENOPTIMGraphsFromFile(File inFile)
Reads a list of DGraphs from file.
static ArrayList< Vertex > readVertexes(File file, Vertex.BBType bbt)
Reads Vertexes from any file that can contain such items.
static final String FS
File separator from system.
Logger class for DENOPTIM.
static final Logger appLogger
Collection of parameters controlling the behavior of the software.
Map< ParametersType, RunTimeParameters > otherParameters
Collection of other parameters by type.
boolean isMaster
Flag signaling this is the master collection of parameters.
String getWorkDirectory()
Gets the pathname to the working directory.
static boolean readYesNoTrueFalse(String s)
Reads a string searching for any common way to say either yes/true (including shorthand t/y) or no/fa...
void ensureFileExists(String pathname)
Ensures a pathname does lead to an existing file or triggers an error.
String paramTypeName()
Returns a string defining the type the parameters collected here.
void ensureIsPositive(String paramName, int value, String paramKey)
Ensures that a parameter is a positive number (x>=0) or triggers an error.
void checkOtherParameters()
Checks any of the parameter collections contained in this instance.
String workDir
Working directory.
final String NL
New line character.
void processOtherParameters()
Processes any of the parameter collections contained in this instance.
int verbosity
Verbosity level for logger.
void ensureFileExistsIfSet(String pathname)
Ensures a pathname is not empty nor null and that it does lead to an existing file or triggers an err...
Parameters controlling execution of the fragmenter.
String structuresFile
Pathname to the file containing the structures of the molecules to fragment.
void setEmbeddedRingsContract(ContractLevel embeddedRingsContract)
double getLinearAngleLimit()
void setFragRejectionSMARTS(Map< String, String > fragRejectionSMARTS)
String getCuttingRulesFilePathname()
void setRejectedAPClassCombinations(Set< String[]> rejectedAPClassCombinations)
void setSaveClustersOfConformerToFile(boolean saveClustersOfConformerToFile)
Sets the flag requesting to print clusters of fragments to file upon extraction of the most represent...
void setCuttingRules(List< CuttingRule > cuttingRules)
Assigns the cutting rules loaded from the input.
boolean doFragmentation()
void setStructuresFile(String structuresFile)
Sets the pathname of the file containing input structures.
ArrayList< Vertex > getIgnorableFragments()
void setSizeUnimodalPop(int sizeUnimodalPop)
Sets the size of the population of normally distributed noise-distorted population used to define the...
void setFragmentationTmpls(List< DGraph > fragmentationTmpls)
Sets the list of graph templates for fragmentation.
double factorForSDOnStatsOfUnimodalPop
Factor used to multiply the standard deviation when adding it to the mean of the RMSD for a unimodal ...
void setCheckFormula(boolean checkFormula)
Sets the value of the flag controlling the execution of elemental analysis on the structures.
Set< String > preFilterSMARTS
SMARTS identifying substructures that lead to rejection of a structure before fragmentation.
void setMinFragHeavyAtomCount(int minFragHeavyAtomCount)
Map< String, File > getMWSlotToAllFragsFile()
Map< String, File > mwSlotToUnqFragsFile
Mapping of the molecular weight slot identifier to the file collecting unique fragments belonging to ...
void setFactorForSDOnStatsOfUnimodalPop(double factorForSDOnStatsOfUnimodalPop)
Sets the weight of the standard deviation when calculating the RMSD threshold from the statistics of ...
String cutRulesFile
Pathname to the file containing the cutting rules.
String formulaeFile
Pathname to the file containing the formulae of the molecules to fragment.
ContractLevel embeddedRingsContract
Type of constrain defined for any template generated upon conversion of molecules into DGraph.
void setRejectedFormulaMoreThan(Set< Map< String, Double > > formulaCriteriaMoreThan)
boolean doRejectWeirdIsotopes
Flag requesting to reject fragments with minor isotopes.
boolean isSaveClustersOfConformerToFile()
boolean doManageIsomorphicFamilies
boolean doExtactRepresentativeConformer()
void checkParameters()
Evaluate consistency of input parameters.
void interpretKeyword(String key, String value)
Processes a keyword/value pair and assign the related parameters.
void setFragRetentionSMARTS(Map< String, String > fragRetentionSMARTS)
final Object MANAGEMWSLOTSSLOCK
Synchronization lock for manipulating a) the collections (i.e., MW slots) of fragments produced by mu...
double getFactorForSDOnStatsOfUnimodalPop()
List< DGraph > getFragmentationTmpls()
Map< String, Double > getRejectedFormulaLessThan()
boolean doAddDuOnLinearity
Flag requesting to add dummy atoms on linearities.
double linearAngleLimit
Upper limit for an angle before it is treated as "flat" ("linear") angle, i.e., close enough to 180 D...
int getMinFragHeavyAtomCount()
Map< String, File > getMWSlotToUnqFragsFile()
int minFragHeavyAtomCount
Lower limit for number of non-H atoms in fragments.
void setAddExplicitH(boolean addExplicitH)
Give true to add explicit H atoms on all atoms.
boolean doFiltering
Flag requesting to do post-fragmentation processing of fragments, i.e., application of all filtration...
void setUseCentroidsAsRepresentativeConformer(boolean useCentroidsAsRepresentativeConformer)
void setRejectedFormulaLessThan(Map< String, Double > formulaMax)
Set< Map< String, Double > > formulaCriteriaMoreThan
Upper limits of formula-based criteria for fragment rejection.
String fragmentationTmplFile
Pathname to file containing a graph to be used as template for fragmentation.
boolean doFragmentation
Fag requesting the fragmentation of the structures.
ScaffoldingPolicy getScaffoldingPolicy()
void setNumTasks(int numParallelTasks)
Sets the number of parallel tasks to run.
Set< String > getRejectedElements()
Set< String > getPreFiltrationSMARTS()
AtomicInteger unqIsomorphicFamilyId
Unique identifier of a family of isomorphic versions of a fragment,.
void setLinearAngleLimit(double linearAngleLimit)
Sets the upper limit for an angle before it is treated as "flat" angle, i.e., close enough to 180 DEG...
boolean doAddDuOnLinearity()
List< CuttingRule > cuttingRules
List of cutting rules sorted by priority.
String getPrintedList()
Returns the list of parameters in a string with newline characters as delimiters.
File getMWSlotFileNameAllFrags(String mwSlotId)
Builds the pathname of the file meant to hold all isomorphic fragments from a given MW slot.
double getMaxNoiseUnimodalPop()
int getIsomorphicSampleSize()
Set< String > rejectedAPClasses
The initial part of APClasses that lead to rejection of a fragment.
Map< String, String > getFragRetentionSMARTS()
int getMaxFragHeavyAtomCount()
boolean doExtactRepresentativeConformer
Flag signaling the request to analyze each isomorphic family to extract the most representative fragm...
ArrayList< Vertex > targetFragments
List of fragment that will be retained, i.e., any isomorphic fragment of any of these will be kept,...
boolean acceptUnsetToSingeBOApprox
Flag requesting to force-accepting the approximation that converts all unset bond orders to single bo...
boolean workingIn3D
Flag activating operations depending on 3D structure.
Map< String, File > mwSlotToAllFragsFile
Mapping of the molecular weight slot identifier to the file collecting all collected fragments belong...
Set< Map< String, Double > > getRejectedFormulaMoreThan()
void setMaxNoiseUnimodalPop(double maxNoiseUnimodalPop)
Sets the maximum noise of the population of normally distributed noise-distorted population used to d...
LinkedHashMap< String, String > getFormulae()
Set< String > rejectedElements
Symbols of elements that lead to rejection of a fragment.
boolean saveClustersOfConformerToFile
Flag requesting to print clusters of fragments to file.
void setMaxFragHeavyAtomCount(int maxFragHeavyAtomCount)
int mwSlotSize
Molecular weight slot width for collecting fragments.
void setRejectWeirdIsotopes(boolean doRejectWeirdIsotopes)
String newIsomorphicFamilyID()
Produced a new unique identifier for a family of isomorphic fragments.
boolean embedRingsInTemplate()
List< CuttingRule > getCuttingRules()
double maxNoiseUnimodalPop
Maximum amount of absolute noise used to generate normally distributed noise-distorted population of ...
boolean checkFormula
Flag requesting the execution of elemental analysis and comparison of the content of the structure fi...
Map< String, String > fragRejectionSMARTS
SMARTS leading to rejection of a fragment.
void setFormulaeFile(String formulaeFile)
Sets the pathname of the file containing molecular formula with a format respecting Cambridge Structu...
File getMWSlotFileNameUnqFrags(String mwSlotId)
Builds the pathname of the file meant to hold unique fragments from within a given MW slot,...
Map< String, String > getFragRejectionSMARTS()
ContractLevel getEmbeddedRingsContract()
String getStructuresFile()
boolean useCentroidsAsRepresentativeConformer
Flag requesting to same cluster centroids rather than the actual fragments that are closest to the ce...
boolean isUseCentroidsAsRepresentativeConformer()
Set< String[]> getRejectedAPClassCombinations()
void setCuttingRulesFilePathname(String pathname)
Assigns the pathname to the cutting rules file.
boolean addExplicitH
Flag requesting to add explicit H atoms.
void setEmbedRingsInTemplate(boolean embedRingsInTemplate)
Set< String > getRejectedAPClasses()
int maxFragHeavyAtomCount
Upper limit for number of non-H atoms in fragments.
int numParallelTasks
Number of parallel tasks to run.
Map< String, Double > formulaCriteriaLessThan
Lower limits of formula-based criteria for fragment rejection.
Map< String, String > fragRetentionSMARTS
SMARTS leading to retention of a fragment.
int sizeUnimodalPop
Size of on-the-fly generated, normally distributed noise-distorted population of geometries used to d...
void setRejectedAPClasses(Set< String > rejectedAPClasses)
boolean isStandaloneFragmentClustering
Flag requesting to run fragment clusterer in stand-alone fashion.
boolean acceptUnsetToSingeBO()
boolean doRejectWeirdIsotopes()
ScaffoldingPolicy scaffoldingPolicy
The policy for defining the scaffold vertex in a graph that does not have such a BBType.
boolean isStandaloneFragmentClustering()
void setScaffoldingPolicy(ScaffoldingPolicy sp)
String ignorableFragmentsFile
Pathname to file with fragments that can be ignored.
FragmenterParameters()
Constructor.
ArrayList< Vertex > getTargetFragments()
boolean preFilter
Fag requesting the pre-fragmentation filtering of the structures.
boolean doManageIsomorphicFamilies()
ArrayList< Vertex > ignorableFragments
List of fragment that can be rejected.
void setRejectedElements(Set< String > rejectedElements)
void setIsomorphicSampleSize(int isomorphicSampleSize)
void processParameters()
Processes all parameters and initialize related objects.
void createWorkingDirectory()
List< DGraph > fragmentationTmpls
List of graphs to be used as templates for fragmentation.
LinkedHashMap< String, String > formulae
Molecular formula read-in from CSD file.
void setMWSlotSize(int mwSlotSize)
boolean embedRingsInTemplate
Flag that enables the embedding of rings in templates upon conversion of molecules into DGraph.
int isomorphicSampleSize
Size of the sample of isomorphic fragments to collect.
Map< String, Integer > isomorphsCount
Counts of isomorphic versions of each known fragment generated in a fragmentation process.
void setMWSlotToAllFragsFile(Map< String, File > mwSlotToAllFragsFile)
static final int MAXISOMORPHICSAMPLESIZE
Maximum isomorphic sample size.
void setWorkingIn3D(boolean workingIn3D)
Sets boolean variable workingIn3D.
Map< String, Integer > getIsomorphsCount()
String targetFragmentsFile
Pathname to file with fragments that will be retained, i.e., any isomorphic fragment of any of these ...
Set< String[]> rejectedAPClassCombinations
Combination of strings matching the beginning of APClass names that lead to rejection of a fragment.
Defines how to define the scaffold vertex of a graph.
Enum specifying to what extent the template's inner graph can be changed.
FREE
Inner graphs are free to change within the confines of the required AttachmentPoints.
The type of building block.
Identifier of the type of parameters.
FRG_PARAMS
Parameters controlling the fragmenter.