19package denoptim.fragmenter;
22import java.io.FileFilter;
23import java.io.FileNotFoundException;
24import java.io.IOException;
25import java.util.ArrayList;
26import java.util.Arrays;
27import java.util.Comparator;
28import java.util.LinkedHashMap;
30import java.util.logging.Level;
31import java.util.stream.Collectors;
33import org.apache.commons.io.FileUtils;
34import org.openscience.cdk.exception.CDKException;
35import org.openscience.cdk.interfaces.IAtomContainer;
36import org.openscience.cdk.interfaces.IBond;
37import org.openscience.cdk.io.iterator.IteratingSMILESReader;
39import denoptim.constants.DENOPTIMConstants;
40import denoptim.exception.DENOPTIMException;
41import denoptim.files.FileFormat;
42import denoptim.io.DenoptimIO;
43import denoptim.io.IteratingAtomContainerReader;
44import denoptim.programs.RunTimeParameters.ParametersType;
45import denoptim.programs.fragmenter.FragmenterParameters;
46import denoptim.task.ParallelAsynchronousTaskExecutor;
47import denoptim.utils.MoleculeUtils;
92 }
catch (IOException | CDKException e1)
95 +
"'. " + e1.getMessage());
131 }
catch (SecurityException | IOException e)
133 throw new Error(
"Unable to start fragmentation thread.",e);
146 List<File> resultFiles =
new ArrayList<File>();
154 }
catch (Exception e)
156 throw new Error(
"Could not extract the most common conformer. "
157 + e.getMessage(), e);
159 for (String pathname : extractor.
getResults())
161 resultFiles.add(
new File(pathname));
171 .map(obj -> (String) obj)
172 .map(pathname ->
new File(pathname))
173 .collect(Collectors.toList());
178 resultFiles.add(
new File ((String)
results.get(0)));
183 if (resultFiles.size()==0)
204 switch (outputFormat)
209 FileUtils.copyFile(resultFiles.get(0), allFragsFile);
210 if (resultFiles.size()>1)
213 resultFiles.subList(1,resultFiles.size()));
215 }
catch (IOException e)
217 throw new Error(
"Unable to create new file '"
218 + allFragsFile +
"'",e);
225 FileUtils.copyFile(resultFiles.get(0), allFragsFile);
226 if (resultFiles.size()>1)
229 resultFiles.subList(1,resultFiles.size()));
231 }
catch (IOException e)
233 throw new Error(
"Unable to create new file '"
234 + allFragsFile +
"'",e);
241 throw new Error(
"NOT IMPLEMENTED YET!");
246 throw new Error(
"Unexpected format "
248 +
"for final collection of fragments");
252 +
"collected in file " + allFragsFile);
262 List<File> files = Arrays.stream(workDir.listFiles(
new FileFilter(){
264 public boolean accept(File pathname) {
265 if (pathname.getName().startsWith(
267 && pathname.getName().contains(
274 })).collect(Collectors.toList());
275 files.sort(
new Comparator<File>() {
278 public int compare(File o1, File o2)
281 String s1 = o1.getName().replace(
283 int i1 = Integer.valueOf(s1.substring(0,s1.indexOf(
"-")));
284 String s2 = o2.getName().replace(
286 int i2 = Integer.valueOf(s2.substring(0,s2.indexOf(
"-")));
287 return Integer.compare(i1, i2);
310 int maxBuffersSize = 50000;
324 boolean relyingOnListSize =
false;
325 List<ArrayList<IAtomContainer>> batches =
326 new ArrayList<ArrayList<IAtomContainer>>();
327 for (
int i=0; i<numBatches; i++)
329 batches.add(
new ArrayList<IAtomContainer>());
337 IAtomContainer mol = reader.
next();
350 batches.get(batchId).add(mol);
354 if (batchId >= numBatches)
358 if (buffersSize >= maxBuffersSize)
361 for (
int i=0; i<numBatches; i++)
369 throw new Error(
"Cannot write to '" + filename +
"'.");
371 batches.get(i).clear();
378 }
catch (IOException e1)
380 throw new Error(
"Could not close reader of SDF file '"
385 if (buffersSize < maxBuffersSize)
387 for (
int i=0; i<numBatches; i++)
395 throw new Error(
"Cannot write to '" + filename +
"'.");
397 batches.get(i).clear();
402 if (formulae!=
null && relyingOnListSize
403 && index != (formulae.size()-1))
405 throw new Error(
"Inconsistent number of formulae "
406 +
"(" + formulae.size() +
") "
407 +
"and structures ("+ index +
") when using the index "
408 +
"in the list of formulae as identifier. For your "
409 +
"sake this in not allowed.");
426 +
"structuresBatch-" + i +
".sdf";
438 LinkedHashMap<String,String> formulae)
440 boolean relyingOnListSize =
false;
442 List<String> formulaeList =
new ArrayList<String>(formulae.values());
444 String molName = mol.getTitle();
445 if (molName!=
null && !molName.isBlank())
447 if (formulae.containsKey(molName))
450 formulae.get(molName));
452 relyingOnListSize =
true;
453 if (index<formulae.size())
456 formulaeList.get(index));
458 throw new Error(
"There are not "
459 +
"enough formulae! Looking for "
460 +
"formula #"+ index +
" but there are "
461 +
"only " + formulae.size()
466 relyingOnListSize =
true;
467 if (index<formulae.size())
470 formulaeList.get(index));
472 throw new Error(
"There are not "
473 +
"enough formulae! Looking for "
474 +
"formula #"+ index +
" but there are "
475 +
"only " + formulae.size()
479 return relyingOnListSize;
General set of constants used in DENOPTIM.
static final Object FORMULASTR
Property name used to store molecular formula as string in an atom container.
static final String MWSLOTFRAGSUNQFILENANEEND
Final part of filename used to collect unique fragments in a certain molecular weight slot.
static final FileFormat TMPFRAGFILEFORMAT
Format for intermediate files used during fragmentation.
static final String MWSLOTFRAGSFILENAMEROOT
Initial part of filename used to collect fragments belonging to a certain molecular weight slot.
Task that performs the various steps in the process that prepares chemical structured to be chopped,...
static String getFragmentsFileName(FragmenterParameters settings, int i)
Builds the pathname of the structure file meant to hold fragments resulting from this task.
String getLogFilePathname()
static String getResultsFileName(FragmenterParameters settings)
Builds the pathname of the structure file meant to hold results that are not necessarily fragments.
Fragments a list of chemical systems by running parallel fragmentation tasks.
boolean doPreFlightOperations()
static List< File > getFilesCollectingIsomorphicFamilyChampions(File workDir)
void createAndSubmitTasks()
Implementations of this method must call the submitTask(Task, String) method to actually send the tas...
static String getStructureFileNameBatch(FragmenterParameters settings, int i)
Builds the pathname of the structure file generated for one of the parallel threads.
FragmenterParameters settings
All settings controlling the tasks executed by this class.
ParallelFragmentationAlgorithm(FragmenterParameters settings)
Constructor.
boolean doPostFlightOperations()
File[] structures
Collection of files with input.
static void splitInputForThreads(FragmenterParameters settings, IteratingAtomContainerReader reader)
Splits the input data (from FragmenterParameters) into batches suitable for parallel batch processing...
static boolean getFormulaForMol(IAtomContainer mol, int index, LinkedHashMap< String, String > formulae)
Takes the molecular formula from the given list of formulae and using the 'Title' property of the ind...
Utility methods for input/output.
static void writeSDFFile(String fileName, IAtomContainer mol)
Writes IAtomContainer to SDF file.
static void appendTxtFiles(File f1, List< File > files)
Appends the second file to the first.
An iterator that take IAtomContainers from a file, possibly using an available iterating reader,...
Class<?> getIteratorType()
void close()
Close the memory-efficient iterator if any is open.
String getWorkDirectory()
Gets the pathname to the working directory.
Logger getLogger()
Get the name of the program specific logger.
Parameters controlling execution of the fragmenter.
boolean doManageIsomorphicFamilies
boolean doFragmentation
Fag requesting the fragmentation of the structures.
boolean doExtactRepresentativeConformer
Flag signaling the request to analyze each isomorphic family to extract the most representative fragm...
LinkedHashMap< String, String > getFormulae()
String getStructuresFile()
void setWorkingIn3D(boolean workingIn3D)
Sets boolean variable workingIn3D.
Runs tasks parallel in asynchronous fashion.
void submitTask(Task task, String logFilePathname)
void run()
Run the parallelized task.
final List< Object > results
List of object returned by completed tasks.