$darkmode
DENOPTIM
ConformerExtractorTask.java
Go to the documentation of this file.
1/*
2 * DENOPTIM
3 * Copyright (C) 2022 Marco Foscato <marco.foscato@uib.no>
4 *
5 * This program is free software: you can redistribute it and/or modify
6 * it under the terms of the GNU Affero General Public License as published
7 * by the Free Software Foundation, either version 3 of the License, or
8 * (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU Affero General Public License for more details.
14 *
15 * You should have received a copy of the GNU Affero General Public License
16 * along with this program. If not, see <http://www.gnu.org/licenses/>.
17 */
18
19package denoptim.fragmenter;
20
21import java.io.File;
22import java.io.FileInputStream;
23import java.io.FileNotFoundException;
24import java.io.IOException;
25import java.util.ArrayList;
26import java.util.Iterator;
27import java.util.List;
28import java.util.logging.FileHandler;
29import java.util.logging.Handler;
30import java.util.logging.Level;
31import java.util.logging.Logger;
32import java.util.logging.SimpleFormatter;
33
34import org.openscience.cdk.DefaultChemObjectBuilder;
35import org.openscience.cdk.interfaces.IAtomContainer;
36import org.openscience.cdk.io.iterator.IteratingSDFReader;
37
38import denoptim.constants.DENOPTIMConstants;
39import denoptim.exception.DENOPTIMException;
40import denoptim.files.FileFormat;
41import denoptim.graph.FragIsomorphNode;
42import denoptim.graph.Fragment;
43import denoptim.graph.Vertex;
44import denoptim.graph.Vertex.BBType;
45import denoptim.io.DenoptimIO;
46import denoptim.programs.fragmenter.FragmenterParameters;
47import denoptim.task.Task;
48import denoptim.utils.TaskUtils;
49
57public class ConformerExtractorTask extends Task
58{
62 private String isomorphicFamilyId = null;
63
68 private File isoFamMembersFile;
69
73 private List<ClusterableFragment> sample;
74
78 protected String results = null;
79
84
88 private Logger logger = null;
89
93 private String logFilePathname = "unset";
94
95//------------------------------------------------------------------------------
96
106 public ConformerExtractorTask(List<Vertex> isomorphicFamily,
107 FragmenterParameters settings) throws SecurityException, IOException
108 {
110 if (isomorphicFamily.size()==0)
111 {
112 throw new Error("Attempt to create a "
113 + this.getClass().getSimpleName() + " from empty list of "
114 + "fragments.");
115 }
116 this.isomorphicFamilyId = "undefinedIsoFamID";
117 this.settings = settings;
118 this.logger = settings.getLogger();
119
120 List<ClusterableFragment> sample = new ArrayList<ClusterableFragment>();
121 for (int i=0; i<isomorphicFamily.size(); i++)
122 {
123 Fragment frag = (Fragment) isomorphicFamily.get(i);
125 i + " ");
126 }
127 this.sample = sample;
128 }
129
130//------------------------------------------------------------------------------
131
143 public ConformerExtractorTask(Vertex oldChampions,
144 FragmenterParameters settings) throws SecurityException, IOException
145 {
147 Object isoFamIdObj = oldChampions.getProperty(
149 if (isoFamIdObj==null)
150 {
151 throw new Error("Attempt to run analysis of isomorphic family for "
152 + "a fragment that does not declare the identity of its "
153 + "family. Missing '" + DENOPTIMConstants.ISOMORPHICFAMILYID
154 + "'.");
155 }
156 this.isomorphicFamilyId = isoFamIdObj.toString();
157 String taskAndFamId = this.getClass().getSimpleName() + "-" + id + "_"
159 this.settings = settings;
160
161 //Create the task-specific logger
162 this.logger = Logger.getLogger(taskAndFamId);
163 int n = logger.getHandlers().length;
164 for (int i=0; i<n; i++)
165 {
166 logger.removeHandler(logger.getHandlers()[0]);
167 }
168 this.logFilePathname = settings.getWorkDirectory() + DenoptimIO.FS
169 + taskAndFamId + ".log";
170 FileHandler fileHdlr = new FileHandler(logFilePathname);
171 SimpleFormatter formatterTxt = new SimpleFormatter();
172 fileHdlr.setFormatter(formatterTxt);
173 logger.setUseParentHandlers(false);
174 logger.addHandler(fileHdlr);
175 logger.setLevel(settings.getLogger().getLevel());
176 String header = "Started logging for " + taskAndFamId;
177 logger.log(Level.INFO,header);
178
179 String mwSlotID = FragmenterTools.getMWSlotIdentifier(oldChampions,
181 this.isoFamMembersFile = settings.getMWSlotFileNameAllFrags(mwSlotID);
182 if (!this.isoFamMembersFile.exists())
183 {
184 throw new Error("Expected file '"
185 + isoFamMembersFile.getAbsolutePath() + "' not found!");
186 }
187 }
188
189//------------------------------------------------------------------------------
190
194 public String getLogFilePathname()
195 {
196 return logFilePathname;
197 }
198
199//------------------------------------------------------------------------------
200
201 @Override
202 public Object call() throws Exception
203 {
204 if (isoFamMembersFile!=null)
206
208 logger);
209 clusterer.cluster();
210
211 List<Fragment> representativeFragments = null;
212 String pathname = "";
214 {
215 representativeFragments = clusterer.getClusterCentroids();
217 } else {
218 representativeFragments = clusterer.getNearestToClusterCentroids();
220 }
222 {
224 }
225
226 List<Vertex> representativeVertexes = new ArrayList<Vertex>();
227 representativeVertexes.addAll(representativeFragments);
229 representativeVertexes);
230 results = pathname;
231
233 {
234 List<List<Fragment>> clusters = clusterer.getTransformedClusters();
235 for (int iCluster=0; iCluster<clusters.size(); iCluster++)
236 {
237 List<Vertex> clusterMembers = new ArrayList<Vertex>();
238 clusterMembers.addAll(clusters.get(iCluster));
240 settings, isomorphicFamilyId, iCluster)),
241 FileFormat.VRTXSDF, clusterMembers);
242 }
243 }
244
245 // Final message
246 logger.log(Level.INFO,"Analysis of isomorphic family completed.");
247
248 // We stop the logger's file handler to remove the lock file.
249 for (Handler h : logger.getHandlers())
250 {
251 if (h instanceof FileHandler) {
252 logger.removeHandler(h);
253 h.close();
254 }
255 }
256
257 completed = true;
258 return results;
259 }
260
261//------------------------------------------------------------------------------
262
271 private List<ClusterableFragment> collectClusterableFragmentsFromFile()
272 throws DENOPTIMException
273 {
274 IteratingSDFReader reader;
275 List<ClusterableFragment> result;
276 try
277 {
278 reader = new IteratingSDFReader(
279 new FileInputStream(isoFamMembersFile),
280 DefaultChemObjectBuilder.getInstance());
281 } catch (FileNotFoundException e1)
282 {
283 // Cannot happen: we ensured the file exist, but it might have been
284 // removed after the check
285 throw new Error("File '" + isoFamMembersFile + "' can "
286 + "not be found anymore.");
287 }
288 try
289 {
290 result = extractClusterableFragments(reader,
292 } finally {
293 try
294 {
295 reader.close();
296 } catch (IOException e)
297 {
298 throw new DENOPTIMException("Couls not close reader on file '"
299 + isoFamMembersFile + "'.",e);
300 }
301 }
302 return result;
303 }
304
305//------------------------------------------------------------------------------
306
318 public static List<ClusterableFragment> extractClusterableFragments(
319 Iterator<IAtomContainer> reader, String isomorphicFamilyId,
320 Logger logger)
321 {
322 List<ClusterableFragment> sample = new ArrayList<ClusterableFragment>();
323 int molId = -1;
324 while (reader.hasNext())
325 {
327 {
328 if (logger !=null)
329 logger.log(Level.INFO,"Sample reached the maximum size: "
330 + sample.size()
331 + ". Ignoring any further fragment.");
332 break;
333 }
334
335 // Read in the next fragment
336 molId++;
337 IAtomContainer mol = reader.next();
338 Object prop = mol.getProperty(DENOPTIMConstants.ISOMORPHICFAMILYID);
339 if (prop==null)
340 {
341 continue;
342 }
343
344 String molName = "";
345 if (mol.getTitle()!=null && !mol.getTitle().isBlank())
346 molName = "'" + mol.getTitle() + "' ";
347
348 Fragment frag = null;
349 if (isomorphicFamilyId.equals(prop.toString()))
350 {
351 try
352 {
353 if (logger !=null)
354 logger.log(Level.FINE,"Adding fragment " + molId
355 + " " + molName + "to the sample of isomorphic "
356 + "family.");
357 frag = new Fragment(mol,BBType.UNDEFINED);
358 } catch (DENOPTIMException e)
359 {
360 if (logger !=null)
361 logger.log(Level.WARNING, "Skipping fragment " + molId
362 + " " + molName + "because it could not "
363 + "be converted into a fragment.");
364 continue;
365 }
366 } else {
367 if (logger !=null)
368 logger.log(Level.FINE, "Skipping fragment " + molId
369 + " " + molName + "because it does not "
370 + "belong to isomorphic family '" + isomorphicFamilyId
371 + "'.");
372 continue;
373 }
374
376 molId + " " + molName);
377 }
378 if (logger !=null)
379 logger.log(Level.INFO, "Sample for " + isomorphicFamilyId
380 + " contains " + sample.size() + " fragments.");
381 return sample;
382 }
383
384//------------------------------------------------------------------------------
385
401 List<ClusterableFragment> sample, Fragment frag, Logger logger,
402 String fragId)
403 {
404 // The clusterable fragments are fragment with a consistent
405 // ordering of the atoms/APs list, so that such order can be used
406 // to calculate RMSD between fragments.
407 ClusterableFragment clusterable = new ClusterableFragment(frag);
408 if (sample.size()==0)
409 {
410 clusterable.setOrderOfNodes(
411 clusterable.getJGraphFragIsomorphism().vertexSet());
412 sample.add(clusterable);
413 } else {
415 try
416 {
417 fa = new FragmentAlignement(sample.get(0).getOriginalFragment(),
418 frag);
419 } catch (DENOPTIMException e)
420 {
421 if (logger !=null)
422 logger.log(Level.WARNING, "Skipping fragment " + fragId
423 + " because no "
424 + "isomorphism could be found with the first "
425 + "fragment in the sample.");
426 return false;
427 }
428
429 List<FragIsomorphNode> orderedNodes =
430 new ArrayList<FragIsomorphNode>();
431 for (FragIsomorphNode nOnFirst : sample.get(0).getOrderedNodes())
432 {
433 orderedNodes.add(
434 fa.getLowestRMSDMapping().getVertexCorrespondence(
435 nOnFirst, true));
436 }
437 clusterable.setOrderOfNodes(orderedNodes);
438 sample.add(clusterable);
439 }
440 return true;
441 }
442
443//------------------------------------------------------------------------------
444
453 String isomorphicFamilyId, int i)
454 {
455 return settings.getWorkDirectory() + DenoptimIO.FS
456 + isomorphicFamilyId + "_cluster-" + i + ".sdf";
457 }
458
459//------------------------------------------------------------------------------
460
468 String isomorphicFamilyId)
469 {
470 return settings.getWorkDirectory() + DenoptimIO.FS
471 + isomorphicFamilyId + "_centroids.sdf";
472 }
473
474//------------------------------------------------------------------------------
475
483 String isomorphicFamilyId)
484 {
485 return settings.getWorkDirectory() + DenoptimIO.FS
486 + isomorphicFamilyId + "_mostCentralFrags.sdf";
487 }
488
489//------------------------------------------------------------------------------
490
494 public String getResultFile()
495 {
496 return results;
497 }
498
499//------------------------------------------------------------------------------
500
501}
General set of constants used in DENOPTIM.
static final Object ISOMORPHICFAMILYID
Property used to store the identifier of the family of isomorphic fragments that owns a fragment.
Represents a fragment that can be clustered based on the 3*N coordinate of atoms and attachment point...
void setOrderOfNodes(Collection< FragIsomorphNode > c)
Sets the order of nodes (i.e., atoms/APs) to use for geometric comparison with other fragments,...
DefaultUndirectedGraph< FragIsomorphNode, FragIsomorphEdge > getJGraphFragIsomorphism()
Task that analyzes an isomorphic family of fragments to identify the most representative fragment (i....
String isomorphicFamilyId
Identifier of the isomorphic family this task deals with.
String logFilePathname
Pathname to thread-specific log.
static String getChosenFragPathname(FragmenterParameters settings, String isomorphicFamilyId)
Builds the pathname for the file where we save all the fragments that we found to be closest to each ...
static String getClusterPathname(FragmenterParameters settings, String isomorphicFamilyId, int i)
Builds the pathname for the file where we save the members of a given cluster.
ConformerExtractorTask(Vertex oldChampions, FragmenterParameters settings)
Constructs a task that will analyze the isomorphic family of the given fragment.
File isoFamMembersFile
File collecting (among others) the sampled members of the isomorphic family.
List< ClusterableFragment > sample
List of fragments defining an isomorphic family to analyse.
ConformerExtractorTask(List< Vertex > isomorphicFamily, FragmenterParameters settings)
Constructs a task that will analyze the given isomorphic family.
static boolean populateListOfClusterizableFragments(List< ClusterableFragment > sample, Fragment frag, Logger logger, String fragId)
Tries to add a fragment into a sample of isomorphic fragments.
List< ClusterableFragment > collectClusterableFragmentsFromFile()
Collects the clusterable fragments from the disk.
static String getClusterCentroidsPathname(FragmenterParameters settings, String isomorphicFamilyId)
Builds the pathname for the file where we save all the centroids of clusters.
String results
The data structure holding the results of this task.
FragmenterParameters settings
Settings for the calculation of the fitness.
static List< ClusterableFragment > extractClusterableFragments(Iterator< IAtomContainer > reader, String isomorphicFamilyId, Logger logger)
Analyzes all the entries provided by the iterator and extracts those that pertain the specified isomo...
Class performing the alignment of Fragments.
GraphMapping< FragIsomorphNode, FragIsomorphEdge > getLowestRMSDMapping()
Returns the mapping leading to the lowest RMSD that could be found among all isomorphic mappings.
List< Fragment > getClusterCentroids()
Once the clustering is done, this method return the list of cluster centroids.
void cluster()
Runs the clustering algorithm:
List< Fragment > getNearestToClusterCentroids()
Once the clustering is done, this method return the list of fragments that are nearest to the respect...
List< List< Fragment > > getTransformedClusters()
Once the clustering is done, this method return the list of clusters.
Task that performs the various steps in the process that prepares chemical structured to be chopped,...
static String getResultsFileName(FragmenterParameters settings)
Builds the pathname of the structure file meant to hold results that are not necessarily fragments.
static String getMWSlotIdentifier(Vertex frag, int slotSize)
Determines the name of the MW slot to use when comparing the given fragment with previously stored fr...
Class representing a continuously connected portion of chemical object holding attachment points.
Definition: Fragment.java:61
A vertex is a data structure that has an identity and holds a list of AttachmentPoints.
Definition: Vertex.java:61
Utility methods for input/output.
static File writeVertexesToFile(File file, FileFormat format, List< Vertex > vertexes)
Writes vertexes to file.
String getWorkDirectory()
Gets the pathname to the working directory.
Logger getLogger()
Get the name of the program specific logger.
Parameters controlling execution of the fragmenter.
File getMWSlotFileNameAllFrags(String mwSlotId)
Builds the pathname of the file meant to hold all isomorphic fragments from a given MW slot.
boolean isStandaloneFragmentClustering
Flag requesting to run fragment clusterer in stand-alone fashion.
static final int MAXISOMORPHICSAMPLESIZE
Maximum isomorphic sample size.
A task that can throw exceptions.
Definition: Task.java:30
boolean completed
Flag about completion.
Definition: Task.java:41
Utilities for tasks.
Definition: TaskUtils.java:31
static synchronized int getUniqueTaskIndex()
Unique counter for tasks.
Definition: TaskUtils.java:41
File formats identified by DENOPTIM.
Definition: FileFormat.java:32
The type of building block.
Definition: Vertex.java:86