19package denoptim.programs.fragmenter;
22import java.lang.reflect.Field;
23import java.text.SimpleDateFormat;
24import java.util.ArrayList;
26import java.util.HashMap;
27import java.util.HashSet;
28import java.util.LinkedHashMap;
32import java.util.concurrent.atomic.AtomicInteger;
33import java.util.logging.Level;
35import denoptim.constants.DENOPTIMConstants;
36import denoptim.exception.DENOPTIMException;
37import denoptim.files.FileFormat;
38import denoptim.files.FileUtils;
39import denoptim.fragmenter.FragmentClusterer;
40import denoptim.fragmenter.ScaffoldingPolicy;
41import denoptim.graph.DGraph;
42import denoptim.graph.Template.ContractLevel;
43import denoptim.graph.Vertex;
44import denoptim.graph.Vertex.BBType;
45import denoptim.io.DenoptimIO;
46import denoptim.logging.StaticLogger;
47import denoptim.programs.RunTimeParameters;
48import denoptim.utils.FormulaUtils;
75 private LinkedHashMap<String, String>
formulae;
159 new HashMap<String,Double>();
167 new HashSet<Map<String,Double>>();
509 this.cutRulesFile = pathname;
730 Map<String, Double> formulaMax)
732 this.formulaCriteriaLessThan = formulaMax;
1044 this.scaffoldingPolicy = sp;
1093 switch (key.toUpperCase())
1099 case "STRUCTURESFILE=":
1103 case "FORMULATXTFILE=":
1108 case "PREFILTERSMARTS=":
1113 case "CUTTINGRULESFILE=":
1118 case "EXTRACTFROMGRAPHS=":
1123 case "ADDEXPLICITHYDROGEN":
1127 case "UNSETTOSINGLEBO":
1131 case "IGNORABLEFRAGMENTS=":
1136 case "TARGETFRAGMENTS=":
1141 case "ISOMORPHICSAMPLESIZE=":
1144 }
catch (Throwable t)
1146 msg =
"Unable to parse value of " + key +
": '" + value +
"'";
1153 case "REMOVEDUPLICATES":
1160 }
catch (Throwable t)
1162 msg =
"Unable to parse value of " + key +
": '" + value +
"'";
1167 case "REJECTMINORISOTOPES":
1172 case "REJECTELEMENT=":
1177 case "REJFORMULALESSTHAN=":
1180 msg =
"Attempt to specify more than one criterion for "
1181 +
"rejecting fragments based on a lower-limit "
1182 +
"molecular formula. ";
1185 Map<String,Double> elSymbolsCount =
null;
1188 }
catch (Throwable t)
1190 msg =
"Unable to parse value of " + key +
": '" + value +
"'";
1197 case "REJFORMULAMORETHAN=":
1198 Map<String,Double> elSymbolsCount2=
null;
1201 }
catch (Throwable t)
1203 msg =
"Unable to parse value of " + key +
": '" + value +
"'";
1210 case "REJECTAPCLASS=":
1215 case "REJECTAPCLASSCOMBINATION=":
1216 String[] lst = value.split(
"\\s+");
1221 case "MAXFRAGSIZE=":
1224 }
catch (Throwable t)
1226 msg =
"Unable to parse value of " + key +
": '" + value +
"'";
1232 case "MINFRAGSIZE=":
1235 }
catch (Throwable t)
1237 msg =
"Unable to parse value of " + key +
": '" + value +
"'";
1243 case "REJECTSMARTS=":
1248 case "RETAINSMARTS=":
1253 case "CLUSTERIZEANDCOLLECT=":
1256 switch (value.trim().toUpperCase())
1268 + key +
": '" + value +
"'");
1272 case "SAVECLUSTERS":
1277 case "SIZEUNIMODALPOPULATION=":
1281 case "MAXNOISEUNIMODALPOPULATION=":
1285 case "SDWEIGHTUNIMODALPOPULATION=":
1289 case "SCAFFOLDINGPOLICY=":
1290 String[] words = value.split(
"\\s+");
1293 words[0].toUpperCase());
1303 scaffoldingPolicy.label = words[1];
1305 }
catch (Throwable t)
1307 msg =
"Unable to parse value of " + key +
": '" + value +
"'";
1312 case "EMBEDRINGSINTEMPLATES=":
1318 case "RINGEMBEDDINGCONTRACT=":
1320 if (value.length() > 0)
1327 case "FRAGMENTATIONTEMPLATE=":
1334 case "BONDSAROUNDTOPOCRITICATOM=":
1346 case "PARALLELTASKS=":
1353 msg =
"Unable to understand value " + key +
"'" + value +
"'";
1365 msg =
"Unable to understand value " + key +
"'" + value +
"'";
1371 msg =
"Keyword " + key +
" is not a known Fragmenter-"
1372 +
"related keyword. Check input files.";
1386 if (!
workDir.equals(System.getProperty(
"user.dir")))
1392 "ISOMORPHICSAMPLESIZE");
1433 }
catch (Throwable e)
1446 }
catch (Throwable e)
1459 }
catch (Throwable e)
1475 +
"Output files associated with the current run are "
1485 String fileSep = System.getProperty(
"file.separator");
1486 boolean success =
false;
1489 SimpleDateFormat sdf =
new SimpleDateFormat(
"yyyyMMddkkmmss");
1490 String str =
"FRG" + sdf.format(
new Date());
1491 workDir = curDir + fileSep + str;
1508 StringBuilder sb =
new StringBuilder(1024);
1510 for (Field f : this.getClass().getDeclaredFields())
1514 sb.append(f.getName()).append(
" = ").append(
1515 f.get(
this)).append(
NL);
1520 +
" parameters. Cause: " + t);
1526 sb.append(otherCollector.getPrintedList());
1528 return sb.toString();
1668 this.useCentroidsAsRepresentativeConformer =
General set of constants used in DENOPTIM.
static final String MWSLOTFRAGSUNQFILENANEEND
Final part of filename used to collect unique fragments in a certain molecular weight slot.
static final String EOL
new line character
static final FileFormat TMPFRAGFILEFORMAT
Format for intermediate files used during fragmentation.
static final String MWSLOTFRAGSFILENAMEROOT
Initial part of filename used to collect fragments belonging to a certain molecular weight slot.
static final String MWSLOTFRAGSALLFILENANEEND
Final part of filename used to collect all samples fragments in a certain molecular weight slot inclu...
static boolean createDirectory(String fileName)
Creates a directory.
static void addToRecentFiles(String fileName, FileFormat ff)
Appends an entry to the list of recent files.
Utility methods for input/output.
static LinkedHashMap< String, String > readCSDFormulae(File file)
Read molecular formula from TXT data representation produced by Cambridge Structural Database tools (...
static void readCuttingRules(BufferedReader reader, List< CuttingRule > cutRules, String source)
Read cutting rules from a stream.
static ArrayList< DGraph > readDENOPTIMGraphsFromFile(File inFile)
Reads a list of DGraphs from file.
static ArrayList< Vertex > readVertexes(File file, Vertex.BBType bbt)
Reads Vertexes from any file that can contain such items.
static final String FS
File separator from system.
Logger class for DENOPTIM.
static final Logger appLogger
Collection of parameters controlling the behavior of the software.
Map< ParametersType, RunTimeParameters > otherParameters
Collection of other parameters by type.
boolean isMaster
Flag signaling this is the master collection of parameters.
String getWorkDirectory()
Gets the pathname to the working directory.
static boolean readYesNoTrueFalse(String s)
Reads a string searching for any common way to say either yes/true (including shorthand t/y) or no/fa...
void ensureFileExists(String pathname)
Ensures a pathname does lead to an existing file or triggers an error.
String paramTypeName()
Returns a string defining the type the parameters collected here.
void ensureIsPositive(String paramName, int value, String paramKey)
Ensures that a parameter is a positive number (x>=0) or triggers an error.
void checkOtherParameters()
Checks any of the parameter collections contained in this instance.
String workDir
Working directory.
final String NL
New line character.
void processOtherParameters()
Processes any of the parameter collections contained in this instance.
int verbosity
Verbosity level for logger.
void ensureFileExistsIfSet(String pathname)
Ensures a pathname is not empty nor null and that it does lead to an existing file or triggers an err...
Parameters controlling execution of the fragmenter.
String structuresFile
Pathname to the file containing the structures of the molecules to fragment or the fragments to proce...
void setEmbeddedRingsContract(ContractLevel embeddedRingsContract)
double getLinearAngleLimit()
void setFragRejectionSMARTS(Map< String, String > fragRejectionSMARTS)
String getCuttingRulesFilePathname()
void setRejectedAPClassCombinations(Set< String[]> rejectedAPClassCombinations)
void setSaveClustersOfConformerToFile(boolean saveClustersOfConformerToFile)
Sets the flag requesting to print clusters of fragments to file upon extraction of the most represent...
void setCuttingRules(List< CuttingRule > cuttingRules)
Assigns the cutting rules loaded from the input.
boolean doFragmentation()
void setStructuresFile(String structuresFile)
Sets the pathname of the file containing input structures.
ArrayList< Vertex > getIgnorableFragments()
void setSizeUnimodalPop(int sizeUnimodalPop)
Sets the size of the population of normally distributed noise-distorted population used to define the...
void setFragmentationTmpls(List< DGraph > fragmentationTmpls)
Sets the list of graph templates for fragmentation.
double factorForSDOnStatsOfUnimodalPop
Factor used to multiply the standard deviation when adding it to the mean of the RMSD for a unimodal ...
void setCheckFormula(boolean checkFormula)
Sets the value of the flag controlling the execution of elemental analysis on the structures.
Set< String > preFilterSMARTS
SMARTS identifying substructures that lead to rejection of a structure before fragmentation.
void setMinFragHeavyAtomCount(int minFragHeavyAtomCount)
Map< String, File > getMWSlotToAllFragsFile()
Map< String, File > mwSlotToUnqFragsFile
Mapping of the molecular weight slot identifier to the file collecting unique fragments belonging to ...
void setFactorForSDOnStatsOfUnimodalPop(double factorForSDOnStatsOfUnimodalPop)
Sets the weight of the standard deviation when calculating the RMSD threshold from the statistics of ...
String cutRulesFile
Pathname to the file containing the cutting rules.
String formulaeFile
Pathname to the file containing the formulae of the molecules to fragment.
ContractLevel embeddedRingsContract
Type of constrain defined for any template generated upon conversion of molecules into DGraph.
void setRejectedFormulaMoreThan(Set< Map< String, Double > > formulaCriteriaMoreThan)
boolean doRejectWeirdIsotopes
Flag requesting to reject fragments with minor isotopes.
boolean isSaveClustersOfConformerToFile()
boolean doManageIsomorphicFamilies
int maxBufferShellSize
Maximum number of buffer shells to consider for fragmentation.
boolean doExtactRepresentativeConformer()
void checkParameters()
Evaluate consistency of input parameters.
void interpretKeyword(String key, String value)
Processes a keyword/value pair and assign the related parameters.
void setFragRetentionSMARTS(Map< String, String > fragRetentionSMARTS)
final Object MANAGEMWSLOTSSLOCK
Synchronization lock for manipulating a) the collections (i.e., MW slots) of fragments produced by mu...
double getFactorForSDOnStatsOfUnimodalPop()
List< DGraph > getFragmentationTmpls()
Map< String, Double > getRejectedFormulaLessThan()
boolean doAddDuOnLinearity
Flag requesting to add dummy atoms on linearities.
double linearAngleLimit
Upper limit for an angle before it is treated as "flat" ("linear") angle, i.e., close enough to 180 D...
int getMinFragHeavyAtomCount()
Map< String, File > getMWSlotToUnqFragsFile()
int minFragHeavyAtomCount
Lower limit for number of non-H atoms in fragments.
void setAddExplicitH(boolean addExplicitH)
Give true to add explicit H atoms on all atoms.
boolean doFiltering
Flag requesting to do post-fragmentation processing of fragments, i.e., application of all filtration...
void setUseCentroidsAsRepresentativeConformer(boolean useCentroidsAsRepresentativeConformer)
void setRejectedFormulaLessThan(Map< String, Double > formulaMax)
Set< Map< String, Double > > formulaCriteriaMoreThan
Upper limits of formula-based criteria for fragment rejection.
String fragmentationTmplFile
Pathname to file containing a graph to be used as template for fragmentation.
boolean doFragmentation
Fag requesting the fragmentation of the structures.
ScaffoldingPolicy getScaffoldingPolicy()
void setNumTasks(int numParallelTasks)
Sets the number of parallel tasks to run.
Set< String > getRejectedElements()
Set< String > getPreFiltrationSMARTS()
AtomicInteger unqIsomorphicFamilyId
Unique identifier of a family of isomorphic versions of a fragment,.
void setLinearAngleLimit(double linearAngleLimit)
Sets the upper limit for an angle before it is treated as "flat" angle, i.e., close enough to 180 DEG...
boolean doAddDuOnLinearity()
List< CuttingRule > cuttingRules
List of cutting rules sorted by priority.
String getPrintedList()
Returns the list of parameters in a string with newline characters as delimiters.
File getMWSlotFileNameAllFrags(String mwSlotId)
Builds the pathname of the file meant to hold all isomorphic fragments from a given MW slot.
double getMaxNoiseUnimodalPop()
int getIsomorphicSampleSize()
Set< String > rejectedAPClasses
The initial part of APClasses that lead to rejection of a fragment.
Map< String, String > getFragRetentionSMARTS()
int getMaxFragHeavyAtomCount()
boolean doExtactRepresentativeConformer
Flag signaling the request to analyze each isomorphic family to extract the most representative fragm...
ArrayList< Vertex > targetFragments
List of fragment that will be retained, i.e., any isomorphic fragment of any of these will be kept,...
boolean acceptUnsetToSingeBOApprox
Flag requesting to force-accepting the approximation that converts all unset bond orders to single bo...
boolean workingIn3D
Flag activating operations depending on 3D structure.
Map< String, File > mwSlotToAllFragsFile
Mapping of the molecular weight slot identifier to the file collecting all collected fragments belong...
Set< Map< String, Double > > getRejectedFormulaMoreThan()
boolean doFragExtractionFromGraphs
Flag requesting to extract fragments from graphs.
void setMaxNoiseUnimodalPop(double maxNoiseUnimodalPop)
Sets the maximum noise of the population of normally distributed noise-distorted population used to d...
LinkedHashMap< String, String > getFormulae()
Set< String > rejectedElements
Symbols of elements that lead to rejection of a fragment.
boolean saveClustersOfConformerToFile
Flag requesting to print clusters of fragments to file.
void setMaxFragHeavyAtomCount(int maxFragHeavyAtomCount)
int mwSlotSize
Molecular weight slot width for collecting fragments.
void setRejectWeirdIsotopes(boolean doRejectWeirdIsotopes)
String newIsomorphicFamilyID()
Produced a new unique identifier for a family of isomorphic fragments.
boolean embedRingsInTemplate()
List< CuttingRule > getCuttingRules()
double maxNoiseUnimodalPop
Maximum amount of absolute noise used to generate normally distributed noise-distorted population of ...
boolean checkFormula
Flag requesting the execution of elemental analysis and comparison of the content of the structure fi...
Map< String, String > fragRejectionSMARTS
SMARTS leading to rejection of a fragment.
void setFormulaeFile(String formulaeFile)
Sets the pathname of the file containing molecular formula with a format respecting Cambridge Structu...
boolean doFragExtractionFromGraphs()
File getMWSlotFileNameUnqFrags(String mwSlotId)
Builds the pathname of the file meant to hold unique fragments from within a given MW slot,...
Map< String, String > getFragRejectionSMARTS()
ContractLevel getEmbeddedRingsContract()
String getStructuresFile()
boolean useCentroidsAsRepresentativeConformer
Flag requesting to same cluster centroids rather than the actual fragments that are closest to the ce...
boolean isUseCentroidsAsRepresentativeConformer()
Set< String[]> getRejectedAPClassCombinations()
void setCuttingRulesFilePathname(String pathname)
Assigns the pathname to the cutting rules file.
boolean addExplicitH
Flag requesting to add explicit H atoms.
void setEmbedRingsInTemplate(boolean embedRingsInTemplate)
Set< String > getRejectedAPClasses()
int maxFragHeavyAtomCount
Upper limit for number of non-H atoms in fragments.
int numParallelTasks
Number of parallel tasks to run.
Map< String, Double > formulaCriteriaLessThan
Lower limits of formula-based criteria for fragment rejection.
Map< String, String > fragRetentionSMARTS
SMARTS leading to retention of a fragment.
int sizeUnimodalPop
Size of on-the-fly generated, normally distributed noise-distorted population of geometries used to d...
void setRejectedAPClasses(Set< String > rejectedAPClasses)
boolean isStandaloneFragmentClustering
Flag requesting to run fragment clusterer in stand-alone fashion.
boolean acceptUnsetToSingeBO()
int getMaxBufferShellSize()
boolean doRejectWeirdIsotopes()
ScaffoldingPolicy scaffoldingPolicy
The policy for defining the scaffold vertex in a graph that does not have such a BBType.
boolean isStandaloneFragmentClustering()
void setScaffoldingPolicy(ScaffoldingPolicy sp)
String ignorableFragmentsFile
Pathname to file with fragments that can be ignored.
FragmenterParameters()
Constructor.
ArrayList< Vertex > getTargetFragments()
boolean preFilter
Fag requesting the pre-fragmentation filtering of the structures.
boolean doManageIsomorphicFamilies()
ArrayList< Vertex > ignorableFragments
List of fragment that can be rejected.
void setRejectedElements(Set< String > rejectedElements)
void setIsomorphicSampleSize(int isomorphicSampleSize)
void processParameters()
Processes all parameters and initialize related objects.
void createWorkingDirectory()
List< DGraph > fragmentationTmpls
List of graphs to be used as templates for fragmentation.
LinkedHashMap< String, String > formulae
Molecular formula read-in from CSD file.
void setMWSlotSize(int mwSlotSize)
boolean embedRingsInTemplate
Flag that enables the embedding of rings in templates upon conversion of molecules into DGraph.
int isomorphicSampleSize
Size of the sample of isomorphic fragments to collect.
Map< String, Integer > isomorphsCount
Counts of isomorphic versions of each known fragment generated in a fragmentation process.
void setMWSlotToAllFragsFile(Map< String, File > mwSlotToAllFragsFile)
static final int MAXISOMORPHICSAMPLESIZE
Maximum isomorphic sample size.
void setWorkingIn3D(boolean workingIn3D)
Sets boolean variable workingIn3D.
Map< String, Integer > getIsomorphsCount()
String targetFragmentsFile
Pathname to file with fragments that will be retained, i.e., any isomorphic fragment of any of these ...
Set< String[]> rejectedAPClassCombinations
Combination of strings matching the beginning of APClass names that lead to rejection of a fragment.
Defines how to define the scaffold vertex of a graph.
Enum specifying to what extent the template's inner graph can be changed.
FREE
Inner graphs are free to change within the confines of the required AttachmentPoints.
The type of building block.
Identifier of the type of parameters.
FRG_PARAMS
Parameters controlling the fragmenter.