19package denoptim.programs.fragmenter;
22import java.lang.reflect.Field;
23import java.text.SimpleDateFormat;
24import java.util.ArrayList;
26import java.util.HashMap;
27import java.util.HashSet;
28import java.util.LinkedHashMap;
32import java.util.concurrent.atomic.AtomicInteger;
33import java.util.logging.Level;
35import denoptim.constants.DENOPTIMConstants;
36import denoptim.exception.DENOPTIMException;
37import denoptim.files.FileFormat;
38import denoptim.files.FileUtils;
39import denoptim.fragmenter.FragmentClusterer;
40import denoptim.fragmenter.ScaffoldingPolicy;
41import denoptim.graph.DGraph;
42import denoptim.graph.Template.ContractLevel;
43import denoptim.graph.Vertex;
44import denoptim.graph.Vertex.BBType;
45import denoptim.io.DenoptimIO;
46import denoptim.logging.StaticLogger;
47import denoptim.programs.RunTimeParameters;
48import denoptim.utils.FormulaUtils;
75 private LinkedHashMap<String, String>
formulae;
154 new HashMap<String,Double>();
162 new HashSet<Map<String,Double>>();
488 this.cutRulesFile = pathname;
699 Map<String, Double> formulaMax)
701 this.formulaCriteriaLessThan = formulaMax;
1013 this.scaffoldingPolicy = sp;
1039 switch (key.toUpperCase())
1045 case "STRUCTURESFILE=":
1049 case "FORMULATXTFILE=":
1054 case "PREFILTERSMARTS=":
1059 case "CUTTINGRULESFILE=":
1064 case "ADDEXPLICITHYDROGEN":
1068 case "UNSETTOSINGLEBO":
1072 case "IGNORABLEFRAGMENTS=":
1077 case "TARGETFRAGMENTS=":
1082 case "ISOMORPHICSAMPLESIZE=":
1085 }
catch (Throwable t)
1087 msg =
"Unable to parse value of " + key +
": '" + value +
"'";
1094 case "REMOVEDUPLICATES":
1101 }
catch (Throwable t)
1103 msg =
"Unable to parse value of " + key +
": '" + value +
"'";
1108 case "REJECTMINORISOTOPES":
1113 case "REJECTELEMENT=":
1118 case "REJFORMULALESSTHAN=":
1121 msg =
"Attempt to specify more than one criterion for "
1122 +
"rejecting fragments based on a lower-limit "
1123 +
"molecular formula. ";
1126 Map<String,Double> elSymbolsCount =
null;
1129 }
catch (Throwable t)
1131 msg =
"Unable to parse value of " + key +
": '" + value +
"'";
1138 case "REJFORMULAMORETHAN=":
1139 Map<String,Double> elSymbolsCount2=
null;
1142 }
catch (Throwable t)
1144 msg =
"Unable to parse value of " + key +
": '" + value +
"'";
1151 case "REJECTAPCLASS=":
1156 case "REJECTAPCLASSCOMBINATION=":
1157 String[] lst = value.split(
"\\s+");
1162 case "MAXFRAGSIZE=":
1165 }
catch (Throwable t)
1167 msg =
"Unable to parse value of " + key +
": '" + value +
"'";
1173 case "MINFRAGSIZE=":
1176 }
catch (Throwable t)
1178 msg =
"Unable to parse value of " + key +
": '" + value +
"'";
1184 case "REJECTSMARTS=":
1189 case "RETAINSMARTS=":
1194 case "CLUSTERIZEANDCOLLECT=":
1197 switch (value.trim().toUpperCase())
1209 + key +
": '" + value +
"'");
1213 case "SAVECLUSTERS":
1218 case "SIZEUNIMODALPOPULATION=":
1222 case "MAXNOISEUNIMODALPOPULATION=":
1226 case "SDWEIGHTUNIMODALPOPULATION=":
1230 case "SCAFFOLDINGPOLICY=":
1231 String[] words = value.split(
"\\s+");
1234 words[0].toUpperCase());
1244 scaffoldingPolicy.label = words[1];
1246 }
catch (Throwable t)
1248 msg =
"Unable to parse value of " + key +
": '" + value +
"'";
1253 case "EMBEDRINGSINTEMPLATES=":
1259 case "RINGEMBEDDINGCONTRACT=":
1261 if (value.length() > 0)
1275 case "PARALLELTASKS=":
1282 msg =
"Unable to understand value " + key +
"'" + value +
"'";
1294 msg =
"Unable to understand value " + key +
"'" + value +
"'";
1300 msg =
"Keyword " + key +
" is not a known Fragmenter-"
1301 +
"related keyword. Check input files.";
1315 if (!
workDir.equals(System.getProperty(
"user.dir")))
1321 "ISOMORPHICSAMPLESIZE");
1361 }
catch (Throwable e)
1374 }
catch (Throwable e)
1390 +
"Output files associated with the current run are "
1400 String fileSep = System.getProperty(
"file.separator");
1401 boolean success =
false;
1404 SimpleDateFormat sdf =
new SimpleDateFormat(
"yyyyMMddkkmmss");
1405 String str =
"FRG" + sdf.format(
new Date());
1406 workDir = curDir + fileSep + str;
1423 StringBuilder sb =
new StringBuilder(1024);
1425 for (Field f : this.getClass().getDeclaredFields())
1429 sb.append(f.getName()).append(
" = ").append(
1430 f.get(
this)).append(
NL);
1435 +
" parameters. Cause: " + t);
1441 sb.append(otherCollector.getPrintedList());
1443 return sb.toString();
1583 this.useCentroidsAsRepresentativeConformer =
General set of constants used in DENOPTIM.
static final String MWSLOTFRAGSUNQFILENANEEND
Final part of filename used to collect unique fragments in a certain molecular weight slot.
static final String EOL
new line character
static final FileFormat TMPFRAGFILEFORMAT
Format for intermediate files used during fragmentation.
static final String MWSLOTFRAGSFILENAMEROOT
Initial part of filename used to collect fragments belonging to a certain molecular weight slot.
static final String MWSLOTFRAGSALLFILENANEEND
Final part of filename used to collect all samples fragments in a certain molecular weight slot inclu...
static boolean createDirectory(String fileName)
Creates a directory.
static void addToRecentFiles(String fileName, FileFormat ff)
Appends an entry to the list of recent files.
Utility methods for input/output.
static LinkedHashMap< String, String > readCSDFormulae(File file)
Read molecular formula from TXT data representation produced by Cambridge Structural Database tools (...
static void readCuttingRules(BufferedReader reader, List< CuttingRule > cutRules, String source)
Read cutting rules from a stream.
static ArrayList< Vertex > readVertexes(File file, Vertex.BBType bbt)
Reads Vertexes from any file that can contain such items.
static final String FS
File separator from system.
Logger class for DENOPTIM.
static final Logger appLogger
Collection of parameters controlling the behavior of the software.
Map< ParametersType, RunTimeParameters > otherParameters
Collection of other parameters by type.
boolean isMaster
Flag signaling this is the master collection of parameters.
String getWorkDirectory()
Gets the pathname to the working directory.
static boolean readYesNoTrueFalse(String s)
Reads a string searching for any common way to say either yes/true (including shorthand t/y) or no/fa...
void ensureFileExists(String pathname)
Ensures a pathname does lead to an existing file or triggers an error.
String paramTypeName()
Returns a string defining the type the parameters collected here.
void ensureIsPositive(String paramName, int value, String paramKey)
Ensures that a parameter is a positive number (x>=0) or triggers an error.
void checkOtherParameters()
Checks any of the parameter collections contained in this instance.
String workDir
Working directory.
final String NL
New line character.
void processOtherParameters()
Processes any of the parameter collections contained in this instance.
int verbosity
Verbosity level for logger.
void ensureFileExistsIfSet(String pathname)
Ensures a pathname is not empty nor null and that it does lead to an existing file or triggers an err...
Parameters controlling execution of the fragmenter.
String structuresFile
Pathname to the file containing the structures of the molecules to fragment.
void setEmbeddedRingsContract(ContractLevel embeddedRingsContract)
double getLinearAngleLimit()
void setFragRejectionSMARTS(Map< String, String > fragRejectionSMARTS)
String getCuttingRulesFilePathname()
void setRejectedAPClassCombinations(Set< String[]> rejectedAPClassCombinations)
void setSaveClustersOfConformerToFile(boolean saveClustersOfConformerToFile)
Sets the flag requesting to print clusters of fragments to file upon extraction of the most represent...
void setCuttingRules(List< CuttingRule > cuttingRules)
Assigns the cutting rules loaded from the input.
boolean doFragmentation()
void setStructuresFile(String structuresFile)
Sets the pathname of the file containing input structures.
ArrayList< Vertex > getIgnorableFragments()
void setSizeUnimodalPop(int sizeUnimodalPop)
Sets the size of the population of normally distributed noise-distorted population used to define the...
double factorForSDOnStatsOfUnimodalPop
Factor used to multiply the standard deviation when adding it to the mean of the RMSD for a unimodal ...
void setCheckFormula(boolean checkFormula)
Sets the value of the flag controlling the execution of elemental analysis on the structures.
Set< String > preFilterSMARTS
SMARTS identifying substructures that lead to rejection of a structure before fragmentation.
void setMinFragHeavyAtomCount(int minFragHeavyAtomCount)
Map< String, File > getMWSlotToAllFragsFile()
Map< String, File > mwSlotToUnqFragsFile
Mapping of the molecular weight slot identifier to the file collecting unique fragments belonging to ...
void setFactorForSDOnStatsOfUnimodalPop(double factorForSDOnStatsOfUnimodalPop)
Sets the weight of the standard deviation when calculating the RMSD threshold from the statistics of ...
String cutRulesFile
Pathname to the file containing the cutting rules.
String formulaeFile
Pathname to the file containing the formulae of the molecules to fragment.
ContractLevel embeddedRingsContract
Type of constrain defined for any template generated upon conversion of molecules into DGraph.
void setRejectedFormulaMoreThan(Set< Map< String, Double > > formulaCriteriaMoreThan)
boolean doRejectWeirdIsotopes
Flag requesting to reject fragments with minor isotopes.
boolean isSaveClustersOfConformerToFile()
boolean doManageIsomorphicFamilies
boolean doExtactRepresentativeConformer()
void checkParameters()
Evaluate consistency of input parameters.
void interpretKeyword(String key, String value)
Processes a keyword/value pair and assign the related parameters.
void setFragRetentionSMARTS(Map< String, String > fragRetentionSMARTS)
final Object MANAGEMWSLOTSSLOCK
Synchronization lock for manipulating a) the collections (i.e., MW slots) of fragments produced by mu...
double getFactorForSDOnStatsOfUnimodalPop()
Map< String, Double > getRejectedFormulaLessThan()
boolean doAddDuOnLinearity
Flag requesting to add dummy atoms on linearities.
double linearAngleLimit
Upper limit for an angle before it is treated as "flat" ("linear") angle, i.e., close enough to 180 D...
int getMinFragHeavyAtomCount()
Map< String, File > getMWSlotToUnqFragsFile()
int minFragHeavyAtomCount
Lower limit for number of non-H atoms in fragments.
void setAddExplicitH(boolean addExplicitH)
Give true to add explicit H atoms on all atoms.
boolean doFiltering
Flag requesting to do post-fragmentation processing of fragments, i.e., application of all filtration...
void setUseCentroidsAsRepresentativeConformer(boolean useCentroidsAsRepresentativeConformer)
void setRejectedFormulaLessThan(Map< String, Double > formulaMax)
Set< Map< String, Double > > formulaCriteriaMoreThan
Upper limits of formula-based criteria for fragment rejection.
boolean doFragmentation
Fag requesting the fragmentation of the structures.
ScaffoldingPolicy getScaffoldingPolicy()
void setNumTasks(int numParallelTasks)
Sets the number of parallel tasks to run.
Set< String > getRejectedElements()
Set< String > getPreFiltrationSMARTS()
AtomicInteger unqIsomorphicFamilyId
Unique identifier of a family of isomorphic versions of a fragment,.
void setLinearAngleLimit(double linearAngleLimit)
Sets the upper limit for an angle before it is treated as "flat" angle, i.e., close enough to 180 DEG...
boolean doAddDuOnLinearity()
List< CuttingRule > cuttingRules
List of cutting rules sorted by priority.
String getPrintedList()
Returns the list of parameters in a string with newline characters as delimiters.
File getMWSlotFileNameAllFrags(String mwSlotId)
Builds the pathname of the file meant to hold all isomorphic fragments from a given MW slot.
double getMaxNoiseUnimodalPop()
int getIsomorphicSampleSize()
Set< String > rejectedAPClasses
The initial part of APClasses that lead to rejection of a fragment.
Map< String, String > getFragRetentionSMARTS()
int getMaxFragHeavyAtomCount()
boolean doExtactRepresentativeConformer
Flag signaling the request to analyze each isomorphic family to extract the most representative fragm...
ArrayList< Vertex > targetFragments
List of fragment that will be retained, i.e., any isomorphic fragment of any of these will be kept,...
boolean acceptUnsetToSingeBOApprox
Flag requesting to force-accepting the approximation that converts all unset bond orders to single bo...
boolean workingIn3D
Flag activating operations depending on 3D structure.
Map< String, File > mwSlotToAllFragsFile
Mapping of the molecular weight slot identifier to the file collecting all collected fragments belong...
Set< Map< String, Double > > getRejectedFormulaMoreThan()
void setMaxNoiseUnimodalPop(double maxNoiseUnimodalPop)
Sets the maximum noise of the population of normally distributed noise-distorted population used to d...
LinkedHashMap< String, String > getFormulae()
Set< String > rejectedElements
Symbols of elements that lead to rejection of a fragment.
boolean saveClustersOfConformerToFile
Flag requesting to print clusters of fragments to file.
void setMaxFragHeavyAtomCount(int maxFragHeavyAtomCount)
int mwSlotSize
Molecular weight slot width for collecting fragments.
void setRejectWeirdIsotopes(boolean doRejectWeirdIsotopes)
String newIsomorphicFamilyID()
Produced a new unique identifier for a family of isomorphic fragments.
boolean embedRingsInTemplate()
List< CuttingRule > getCuttingRules()
double maxNoiseUnimodalPop
Maximum amount of absolute noise used to generate normally distributed noise-distorted population of ...
boolean checkFormula
Flag requesting the execution of elemental analysis and comparison of the content of the structure fi...
Map< String, String > fragRejectionSMARTS
SMARTS leading to rejection of a fragment.
void setFormulaeFile(String formulaeFile)
Sets the pathname of the file containing molecular formula with a format respecting Cambridge Structu...
File getMWSlotFileNameUnqFrags(String mwSlotId)
Builds the pathname of the file meant to hold unique fragments from within a given MW slot,...
Map< String, String > getFragRejectionSMARTS()
ContractLevel getEmbeddedRingsContract()
String getStructuresFile()
boolean useCentroidsAsRepresentativeConformer
Flag requesting to same cluster centroids rather than the actual fragments that are closest to the ce...
boolean isUseCentroidsAsRepresentativeConformer()
Set< String[]> getRejectedAPClassCombinations()
void setCuttingRulesFilePathname(String pathname)
Assigns the pathname to the cutting rules file.
boolean addExplicitH
Flag requesting to add explicit H atoms.
void setEmbedRingsInTemplate(boolean embedRingsInTemplate)
Set< String > getRejectedAPClasses()
int maxFragHeavyAtomCount
Upper limit for number of non-H atoms in fragments.
int numParallelTasks
Number of parallel tasks to run.
Map< String, Double > formulaCriteriaLessThan
Lower limits of formula-based criteria for fragment rejection.
Map< String, String > fragRetentionSMARTS
SMARTS leading to retention of a fragment.
int sizeUnimodalPop
Size of on-the-fly generated, normally distributed noise-distorted population of geometries used to d...
void setRejectedAPClasses(Set< String > rejectedAPClasses)
boolean isStandaloneFragmentClustering
Flag requesting to run fragment clusterer in stand-alone fashion.
boolean acceptUnsetToSingeBO()
boolean doRejectWeirdIsotopes()
ScaffoldingPolicy scaffoldingPolicy
The policy for defining the scaffold vertex in a graph that does not have such a BBType.
boolean isStandaloneFragmentClustering()
void setScaffoldingPolicy(ScaffoldingPolicy sp)
String ignorableFragmentsFile
Pathname to file with fragments that can be ignored.
FragmenterParameters()
Constructor.
ArrayList< Vertex > getTargetFragments()
boolean preFilter
Fag requesting the pre-fragmentation filtering of the structures.
boolean doManageIsomorphicFamilies()
ArrayList< Vertex > ignorableFragments
List of fragment that can be rejected.
void setRejectedElements(Set< String > rejectedElements)
void setIsomorphicSampleSize(int isomorphicSampleSize)
void processParameters()
Processes all parameters and initialize related objects.
void createWorkingDirectory()
LinkedHashMap< String, String > formulae
Molecular formula read-in from CSD file.
void setMWSlotSize(int mwSlotSize)
boolean embedRingsInTemplate
Flag that enables the embedding of rings in templates upon conversion of molecules into DGraph.
int isomorphicSampleSize
Size of the sample of isomorphic fragments to collect.
Map< String, Integer > isomorphsCount
Counts of isomorphic versions of each known fragment generated in a fragmentation process.
void setMWSlotToAllFragsFile(Map< String, File > mwSlotToAllFragsFile)
static final int MAXISOMORPHICSAMPLESIZE
Maximum isomorphic sample size.
void setWorkingIn3D(boolean workingIn3D)
Sets boolean variable workingIn3D.
Map< String, Integer > getIsomorphsCount()
String targetFragmentsFile
Pathname to file with fragments that will be retained, i.e., any isomorphic fragment of any of these ...
Set< String[]> rejectedAPClassCombinations
Combination of strings matching the beginning of APClass names that lead to rejection of a fragment.
Defines how to define the scaffold vertex of a graph.
Enum specifying to what extent the template's inner graph can be changed.
FREE
Inner graphs are free to change within the confines of the required AttachmentPoints.
The type of building block.
Identifier of the type of parameters.
FRG_PARAMS
Parameters controlling the fragmenter.