19package denoptim.programs.fragmenter;
22import java.lang.reflect.Field;
23import java.text.SimpleDateFormat;
24import java.util.ArrayList;
26import java.util.HashMap;
27import java.util.HashSet;
28import java.util.LinkedHashMap;
32import java.util.concurrent.atomic.AtomicInteger;
33import java.util.logging.Level;
35import denoptim.constants.DENOPTIMConstants;
36import denoptim.exception.DENOPTIMException;
37import denoptim.files.FileFormat;
38import denoptim.files.FileUtils;
39import denoptim.fragmenter.FragmentClusterer;
40import denoptim.fragmenter.ScaffoldingPolicy;
41import denoptim.graph.DGraph;
42import denoptim.graph.Template.ContractLevel;
43import denoptim.graph.Vertex;
44import denoptim.graph.Vertex.BBType;
45import denoptim.io.DenoptimIO;
46import denoptim.logging.StaticLogger;
47import denoptim.programs.RunTimeParameters;
48import denoptim.utils.FormulaUtils;
75 private LinkedHashMap<String, String>
formulae;
154 new HashMap<String,Double>();
162 new HashSet<Map<String,Double>>();
488 this.cutRulesFile = pathname;
699 Map<String, Double> formulaMax)
701 this.formulaCriteriaLessThan = formulaMax;
1002 this.scaffoldingPolicy = sp;
1028 switch (key.toUpperCase())
1034 case "STRUCTURESFILE=":
1038 case "FORMULATXTFILE=":
1043 case "PREFILTERSMARTS=":
1048 case "CUTTINGRULESFILE=":
1053 case "ADDEXPLICITHYDROGEN":
1057 case "UNSETTOSINGLEBO":
1061 case "IGNORABLEFRAGMENTS=":
1066 case "TARGETFRAGMENTS=":
1071 case "ISOMORPHICSAMPLESIZE=":
1074 }
catch (Throwable t)
1076 msg =
"Unable to parse value of " + key +
": '" + value +
"'";
1083 case "REMOVEDUPLICATES":
1090 }
catch (Throwable t)
1092 msg =
"Unable to parse value of " + key +
": '" + value +
"'";
1097 case "REJECTMINORISOTOPES":
1102 case "REJECTELEMENT=":
1107 case "REJFORMULALESSTHAN=":
1110 msg =
"Attempt to specify more than one criterion for "
1111 +
"rejecting fragments based on a lower-limit "
1112 +
"molecular formula. ";
1115 Map<String,Double> elSymbolsCount =
null;
1118 }
catch (Throwable t)
1120 msg =
"Unable to parse value of " + key +
": '" + value +
"'";
1127 case "REJFORMULAMORETHAN=":
1128 Map<String,Double> elSymbolsCount2=
null;
1131 }
catch (Throwable t)
1133 msg =
"Unable to parse value of " + key +
": '" + value +
"'";
1140 case "REJECTAPCLASS=":
1145 case "REJECTAPCLASSCOMBINATION=":
1146 String[] lst = value.split(
"\\s+");
1151 case "MAXFRAGSIZE=":
1154 }
catch (Throwable t)
1156 msg =
"Unable to parse value of " + key +
": '" + value +
"'";
1162 case "MINFRAGSIZE=":
1165 }
catch (Throwable t)
1167 msg =
"Unable to parse value of " + key +
": '" + value +
"'";
1173 case "REJECTSMARTS=":
1178 case "RETAINSMARTS=":
1183 case "CLUSTERIZEANDCOLLECT=":
1186 switch (value.trim().toUpperCase())
1198 + key +
": '" + value +
"'");
1202 case "SAVECLUSTERS":
1207 case "SIZEUNIMODALPOPULATION=":
1211 case "MAXNOISEUNIMODALPOPULATION=":
1215 case "SDWEIGHTUNIMODALPOPULATION=":
1219 case "SCAFFOLDINGPOLICY=":
1220 String[] words = value.split(
"\\s+");
1232 scaffoldingPolicy.label = words[1];
1234 }
catch (Throwable t)
1236 msg =
"Unable to parse value of " + key +
": '" + value +
"'";
1241 case "EMBEDRINGSINTEMPLATES=":
1247 case "RINGEMBEDDINGCONTRACT=":
1249 if (value.length() > 0)
1263 case "PARALLELTASKS=":
1270 msg =
"Unable to understand value " + key +
"'" + value +
"'";
1282 msg =
"Unable to understand value " + key +
"'" + value +
"'";
1288 msg =
"Keyword " + key +
" is not a known Fragmenter-"
1289 +
"related keyword. Check input files.";
1303 if (!
workDir.equals(System.getProperty(
"user.dir")))
1309 "ISOMORPHICSAMPLESIZE");
1349 }
catch (Throwable e)
1362 }
catch (Throwable e)
1378 +
"Output files associated with the current run are "
1388 String fileSep = System.getProperty(
"file.separator");
1389 boolean success =
false;
1392 SimpleDateFormat sdf =
new SimpleDateFormat(
"yyyyMMddkkmmss");
1393 String str =
"FRG" + sdf.format(
new Date());
1394 workDir = curDir + fileSep + str;
1411 StringBuilder sb =
new StringBuilder(1024);
1413 for (Field f : this.getClass().getDeclaredFields())
1417 sb.append(f.getName()).append(
" = ").append(
1418 f.get(
this)).append(
NL);
1423 +
" parameters. Cause: " + t);
1429 sb.append(otherCollector.getPrintedList());
1431 return sb.toString();
1571 this.useCentroidsAsRepresentativeConformer =
General set of constants used in DENOPTIM.
static final String MWSLOTFRAGSUNQFILENANEEND
Final part of filename used to collect unique fragments in a certain molecular weight slot.
static final String EOL
new line character
static final FileFormat TMPFRAGFILEFORMAT
Format for intermediate files used during fragmentation.
static final String MWSLOTFRAGSFILENAMEROOT
Initial part of filename used to collect fragments belonging to a certain molecular weight slot.
static final String MWSLOTFRAGSALLFILENANEEND
Final part of filename used to collect all samples fragments in a certain molecular weight slot inclu...
static boolean createDirectory(String fileName)
Creates a directory.
static void addToRecentFiles(String fileName, FileFormat ff)
Appends an entry to the list of recent files.
Utility methods for input/output.
static LinkedHashMap< String, String > readCSDFormulae(File file)
Read molecular formula from TXT data representation produced by Cambridge Structural Database tools (...
static void readCuttingRules(BufferedReader reader, List< CuttingRule > cutRules, String source)
Read cutting rules from a stream.
static ArrayList< Vertex > readVertexes(File file, Vertex.BBType bbt)
Reads Vertexes from any file that can contain such items.
static final String FS
File separator from system.
Logger class for DENOPTIM.
static final Logger appLogger
Collection of parameters controlling the behavior of the software.
Map< ParametersType, RunTimeParameters > otherParameters
Collection of other parameters by type.
boolean isMaster
Flag signaling this is the master collection of parameters.
String getWorkDirectory()
Gets the pathname to the working directory.
static boolean readYesNoTrueFalse(String s)
Reads a string searching for any common way to say either yes/true (including shorthand t/y) or no/fa...
void ensureFileExists(String pathname)
Ensures a pathname does lead to an existing file or triggers an error.
String paramTypeName()
Returns a string defining the type the parameters collected here.
void ensureIsPositive(String paramName, int value, String paramKey)
Ensures that a parameter is a positive number (x>=0) or triggers an error.
void checkOtherParameters()
Checks any of the parameter collections contained in this instance.
String workDir
Working directory.
final String NL
New line character.
void processOtherParameters()
Processes any of the parameter collections contained in this instance.
int verbosity
Verbosity level for logger.
void ensureFileExistsIfSet(String pathname)
Ensures a pathname is not empty nor null and that it does lead to an existing file or triggers an err...
Parameters controlling execution of the fragmenter.
String structuresFile
Pathname to the file containing the structures of the molecules to fragment.
void setEmbeddedRingsContract(ContractLevel embeddedRingsContract)
double getLinearAngleLimit()
void setFragRejectionSMARTS(Map< String, String > fragRejectionSMARTS)
String getCuttingRulesFilePathname()
void setRejectedAPClassCombinations(Set< String[]> rejectedAPClassCombinations)
void setSaveClustersOfConformerToFile(boolean saveClustersOfConformerToFile)
Sets the flag requesting to print clusters of fragments to file upon extraction of the most represent...
void setCuttingRules(List< CuttingRule > cuttingRules)
Assigns the cutting rules loaded from the input.
boolean doFragmentation()
void setStructuresFile(String structuresFile)
Sets the pathname of the file containing input structures.
ArrayList< Vertex > getIgnorableFragments()
void setSizeUnimodalPop(int sizeUnimodalPop)
Sets the size of the population of normally distributed noise-distorted population used to define the...
double factorForSDOnStatsOfUnimodalPop
Factor used to multiply the standard deviation when adding it to the mean of the RMSD for a unimodal ...
void setCheckFormula(boolean checkFormula)
Sets the value of the flag controlling the execution of elemental analysis on the structures.
Set< String > preFilterSMARTS
SMARTS identifying substructures that lead to rejection of a structure before fragmentation.
void setMinFragHeavyAtomCount(int minFragHeavyAtomCount)
Map< String, File > getMWSlotToAllFragsFile()
Map< String, File > mwSlotToUnqFragsFile
Mapping of the molecular weight slot identifier to the file collecting unique fragments belonging to ...
void setFactorForSDOnStatsOfUnimodalPop(double factorForSDOnStatsOfUnimodalPop)
Sets the weight of the standard deviation when calculating the RMSD threshold from the statistics of ...
String cutRulesFile
Pathname to the file containing the cutting rules.
String formulaeFile
Pathname to the file containing the formulae of the molecules to fragment.
ContractLevel embeddedRingsContract
Type of constrain defined for any template generated upon conversion of molecules into DGraph.
void setRejectedFormulaMoreThan(Set< Map< String, Double > > formulaCriteriaMoreThan)
boolean doRejectWeirdIsotopes
Flag requesting to reject fragments with minor isotopes.
boolean isSaveClustersOfConformerToFile()
boolean doManageIsomorphicFamilies
boolean doExtactRepresentativeConformer()
void checkParameters()
Evaluate consistency of input parameters.
void interpretKeyword(String key, String value)
Processes a keyword/value pair and assign the related parameters.
void setFragRetentionSMARTS(Map< String, String > fragRetentionSMARTS)
final Object MANAGEMWSLOTSSLOCK
Synchronization lock for manipulating a) the collections (i.e., MW slots) of fragments produced by mu...
double getFactorForSDOnStatsOfUnimodalPop()
Map< String, Double > getRejectedFormulaLessThan()
boolean doAddDuOnLinearity
Flag requesting to add dummy atoms on linearities.
double linearAngleLimit
Upper limit for an angle before it is treated as "flat" ("linear") angle, i.e., close enough to 180 D...
int getMinFragHeavyAtomCount()
Map< String, File > getMWSlotToUnqFragsFile()
int minFragHeavyAtomCount
Lower limit for number of non-H atoms in fragments.
boolean doFiltering
Flag requesting to do post-fragmentation processing of fragments, i.e., application of all filtration...
void setUseCentroidsAsRepresentativeConformer(boolean useCentroidsAsRepresentativeConformer)
void setRejectedFormulaLessThan(Map< String, Double > formulaMax)
Set< Map< String, Double > > formulaCriteriaMoreThan
Upper limits of formula-based criteria for fragment rejection.
boolean doFragmentation
Fag requesting the fragmentation of the structures.
ScaffoldingPolicy getScaffoldingPolicy()
void setNumTasks(int numParallelTasks)
Sets the number of parallel tasks to run.
Set< String > getRejectedElements()
Set< String > getPreFiltrationSMARTS()
AtomicInteger unqIsomorphicFamilyId
Unique identifier of a family of isomorphic versions of a fragment,.
void setLinearAngleLimit(double linearAngleLimit)
Sets the upper limit for an angle before it is treated as "flat" angle, i.e., close enough to 180 DEG...
boolean doAddDuOnLinearity()
List< CuttingRule > cuttingRules
List of cutting rules sorted by priority.
String getPrintedList()
Returns the list of parameters in a string with newline characters as delimiters.
File getMWSlotFileNameAllFrags(String mwSlotId)
Builds the pathname of the file meant to hold all isomorphic fragments from a given MW slot.
double getMaxNoiseUnimodalPop()
int getIsomorphicSampleSize()
Set< String > rejectedAPClasses
The initial part of APClasses that lead to rejection of a fragment.
Map< String, String > getFragRetentionSMARTS()
int getMaxFragHeavyAtomCount()
boolean doExtactRepresentativeConformer
Flag signaling the request to analyze each isomorphic family to extract the most representative fragm...
ArrayList< Vertex > targetFragments
List of fragment that will be retained, i.e., any isomorphic fragment of any of these will be kept,...
boolean acceptUnsetToSingeBOApprox
Flag requesting to force-accepting the approximation that converts all unset bond orders to single bo...
boolean workingIn3D
Flag activating operations depending on 3D structure.
Map< String, File > mwSlotToAllFragsFile
Mapping of the molecular weight slot identifier to the file collecting all collected fragments belong...
Set< Map< String, Double > > getRejectedFormulaMoreThan()
void setMaxNoiseUnimodalPop(double maxNoiseUnimodalPop)
Sets the maximum noise of the population of normally distributed noise-distorted population used to d...
LinkedHashMap< String, String > getFormulae()
Set< String > rejectedElements
Symbols of elements that lead to rejection of a fragment.
boolean saveClustersOfConformerToFile
Flag requesting to print clusters of fragments to file.
void setMaxFragHeavyAtomCount(int maxFragHeavyAtomCount)
int mwSlotSize
Molecular weight slot width for collecting fragments.
void setRejectWeirdIsotopes(boolean doRejectWeirdIsotopes)
String newIsomorphicFamilyID()
Produced a new unique identifier for a family of isomorphic fragments.
boolean embedRingsInTemplate()
List< CuttingRule > getCuttingRules()
double maxNoiseUnimodalPop
Maximum amount of absolute noise used to generate normally distributed noise-distorted population of ...
boolean checkFormula
Flag requesting the execution of elemental analysis and comparison of the content of the structure fi...
Map< String, String > fragRejectionSMARTS
SMARTS leading to rejection of a fragment.
void setFormulaeFile(String formulaeFile)
Sets the pathname of the file containing molecular formula with a format respecting Cambridge Structu...
File getMWSlotFileNameUnqFrags(String mwSlotId)
Builds the pathname of the file meant to hold unique fragments from within a given MW slot,...
Map< String, String > getFragRejectionSMARTS()
ContractLevel getEmbeddedRingsContract()
String getStructuresFile()
boolean useCentroidsAsRepresentativeConformer
Flag requesting to same cluster centroids rather than the actual fragments that are closest to the ce...
boolean isUseCentroidsAsRepresentativeConformer()
Set< String[]> getRejectedAPClassCombinations()
void setCuttingRulesFilePathname(String pathname)
Assigns the pathname to the cutting rules file.
boolean addExplicitH
Flag requesting to add explicit H atoms.
void setEmbedRingsInTemplate(boolean embedRingsInTemplate)
Set< String > getRejectedAPClasses()
int maxFragHeavyAtomCount
Upper limit for number of non-H atoms in fragments.
int numParallelTasks
Number of parallel tasks to run.
Map< String, Double > formulaCriteriaLessThan
Lower limits of formula-based criteria for fragment rejection.
Map< String, String > fragRetentionSMARTS
SMARTS leading to retention of a fragment.
int sizeUnimodalPop
Size of on-the-fly generated, normally distributed noise-distorted population of geometries used to d...
void setRejectedAPClasses(Set< String > rejectedAPClasses)
boolean isStandaloneFragmentClustering
Flag requesting to run fragment clusterer in stand-alone fashion.
boolean acceptUnsetToSingeBO()
boolean doRejectWeirdIsotopes()
ScaffoldingPolicy scaffoldingPolicy
The policy for defining the scaffold vertex in a graph that does not have such a BBType.
boolean isStandaloneFragmentClustering()
void setScaffoldingPolicy(ScaffoldingPolicy sp)
String ignorableFragmentsFile
Pathname to file with fragments that can be ignored.
FragmenterParameters()
Constructor.
ArrayList< Vertex > getTargetFragments()
boolean preFilter
Fag requesting the pre-fragmentation filtering of the structures.
boolean doManageIsomorphicFamilies()
ArrayList< Vertex > ignorableFragments
List of fragment that can be rejected.
void setRejectedElements(Set< String > rejectedElements)
void setIsomorphicSampleSize(int isomorphicSampleSize)
void processParameters()
Processes all parameters and initialize related objects.
void createWorkingDirectory()
LinkedHashMap< String, String > formulae
Molecular formula read-in from CSD file.
void setMWSlotSize(int mwSlotSize)
boolean embedRingsInTemplate
Flag that enables the embedding of rings in templates upon conversion of molecules into DGraph.
int isomorphicSampleSize
Size of the sample of isomorphic fragments to collect.
Map< String, Integer > isomorphsCount
Counts of isomorphic versions of each known fragment generated in a fragmentation process.
void setMWSlotToAllFragsFile(Map< String, File > mwSlotToAllFragsFile)
static final int MAXISOMORPHICSAMPLESIZE
Maximum isomorphic sample size.
void setWorkingIn3D(boolean workingIn3D)
Sets boolean variable workingIn3D.
Map< String, Integer > getIsomorphsCount()
String targetFragmentsFile
Pathname to file with fragments that will be retained, i.e., any isomorphic fragment of any of these ...
Set< String[]> rejectedAPClassCombinations
Combination of strings matching the beginning of APClass names that lead to rejection of a fragment.
Defines how to define the scaffold vertex of a graph.
Enum specifying to what extent the template's inner graph can be changed.
FREE
Inner graphs are free to change within the confines of the required AttachmentPoints.
The type of building block.
Identifier of the type of parameters.
FRG_PARAMS
Parameters controlling the fragmenter.