$darkmode
DENOPTIM
FragmenterTools.java
Go to the documentation of this file.
1package denoptim.fragmenter;
2
3import java.io.File;
4import java.io.FileInputStream;
5import java.io.IOException;
6import java.util.ArrayList;
7import java.util.HashMap;
8import java.util.HashSet;
9import java.util.List;
10import java.util.Map;
11import java.util.Set;
12import java.util.logging.Level;
13import java.util.logging.Logger;
14
15import javax.vecmath.Point3d;
16import javax.vecmath.Vector3d;
17
18import org.openscience.cdk.Atom;
19import org.openscience.cdk.Bond;
20import org.openscience.cdk.DefaultChemObjectBuilder;
21import org.openscience.cdk.PseudoAtom;
22import org.openscience.cdk.config.Isotopes;
23import org.openscience.cdk.exception.CDKException;
24import org.openscience.cdk.geometry.alignment.KabschAlignment;
25import org.openscience.cdk.interfaces.IAtom;
26import org.openscience.cdk.interfaces.IAtomContainer;
27import org.openscience.cdk.interfaces.IBond;
28import org.openscience.cdk.interfaces.IIsotope;
29import org.openscience.cdk.io.iterator.IteratingSDFReader;
30import org.openscience.cdk.isomorphism.Mappings;
31import org.openscience.cdk.isomorphism.Pattern;
32import org.openscience.cdk.silent.SilentChemObjectBuilder;
33import org.openscience.cdk.tools.manipulator.AtomContainerManipulator;
34
35import denoptim.constants.DENOPTIMConstants;
36import denoptim.exception.DENOPTIMException;
37import denoptim.files.FileFormat;
38import denoptim.files.UndetectedFileFormatException;
39import denoptim.graph.APClass;
40import denoptim.graph.AttachmentPoint;
41import denoptim.graph.DGraph;
42import denoptim.graph.Edge;
43import denoptim.graph.Fragment;
44import denoptim.graph.Template;
45import denoptim.graph.Vertex;
46import denoptim.graph.Vertex.BBType;
47import denoptim.graph.rings.RingClosingAttractor;
48import denoptim.io.DenoptimIO;
49import denoptim.io.IteratingAtomContainerReader;
50import denoptim.molecularmodeling.ThreeDimTreeBuilder;
51import denoptim.programs.RunTimeParameters.ParametersType;
52import denoptim.programs.fragmenter.CuttingRule;
53import denoptim.programs.fragmenter.FragmenterParameters;
54import denoptim.programs.fragmenter.MatchedBond;
55import denoptim.utils.CartesianSpaceUtils;
56import denoptim.utils.DummyAtomHandler;
57import denoptim.utils.FormulaUtils;
58import denoptim.utils.ManySMARTSQuery;
59import denoptim.utils.MathUtils;
60import denoptim.utils.MoleculeUtils;
61import denoptim.utils.Randomizer;
62
63public class FragmenterTools
64{
65
66//------------------------------------------------------------------------------
67
82 public static void checkElementalAnalysisAgainstFormula(File input,
83 File output, Logger logger)
84 throws DENOPTIMException, IOException
85 {
86 FileInputStream fis = new FileInputStream(input);
87 IteratingSDFReader reader = new IteratingSDFReader(fis,
88 DefaultChemObjectBuilder.getInstance());
89
90 int index = -1;
91 int maxBufferSize = 2000;
92 ArrayList<IAtomContainer> buffer = new ArrayList<IAtomContainer>(500);
93 try {
94 while (reader.hasNext())
95 {
96 index++;
97 if (logger!=null)
98 {
99 logger.log(Level.FINE,"Checking elemental analysis of "
100 + "structure " + index);
101 }
102 IAtomContainer mol = reader.next();
103 if (mol.getProperty(DENOPTIMConstants.FORMULASTR)==null)
104 {
105 throw new Error("Property '" + DENOPTIMConstants.FORMULASTR
106 + "' not found in molecule " + index + " in file "
107 + input + ". Cannot compare formula with elemental"
108 + "analysis.");
109 }
110 String formula = mol.getProperty(DENOPTIMConstants.FORMULASTR)
111 .toString();
112
114 mol, logger))
115 {
116 buffer.add(mol);
117 } else {
118 if (logger!=null)
119 {
120 logger.log(Level.INFO,"Inconsistency between elemental "
121 + "analysis of structure and molecular formula."
122 + " Rejecting structure " + index + ": "
123 + mol.getTitle());
124 }
125 }
126
127 // If max buffer size is reached, then bump to file
128 if (buffer.size() >= maxBufferSize)
129 {
130 DenoptimIO.writeSDFFile(output.getAbsolutePath(), buffer,
131 true);
132 buffer.clear();
133 }
134 }
135 }
136 finally {
137 reader.close();
138 }
139 if (buffer.size() < maxBufferSize)
140 {
141 DenoptimIO.writeSDFFile(output.getAbsolutePath(), buffer, true);
142 buffer.clear();
143 }
144 }
145
146//------------------------------------------------------------------------------
147
148
162 public static boolean prepareMolToFragmentation(IAtomContainer mol,
163 FragmenterParameters settings, int index)
164 {
165 try
166 {
167 if (settings.addExplicitH())
168 {
170 } else {
172 }
174 } catch (CDKException e)
175 {
176 if (e.getMessage().contains("Cannot assign Kekulé structure"))
177 {
178 if (!settings.acceptUnsetToSingeBO())
179 {
180 settings.getLogger().log(Level.WARNING,"Some bond order "
181 + "are unset and attempt to kekulize the "
182 + "system has failed "
183 + "for structure " + index + ". "
184 + "This hampers use of SMARTS queries, which "
185 + "may very well "
186 + "not work as expected. Structure " + index
187 + " will be rejected. "
188 + "You can avoid rejection by using "
189 + "keyword "
190 + ParametersType.FRG_PARAMS.getKeywordRoot()
191 + "UNSETTOSINGLEBO, but you'll "
192 + "still be using a peculiar connectivity "
193 + "table were "
194 + "many bonds are artificially marked as "
195 + "single to "
196 + "avoid use of 'UNSET' bond order. "
197 + "Further details on the problem: "
198 + e.getMessage());
199 return false;
200 } else {
201 settings.getLogger().log(Level.WARNING,"Failed "
202 + "kekulization "
203 + "for structure " + index
204 + " but UNSETTOSINGLEBO "
205 + "keyword used. Forcing use of single bonds to "
206 + "replace bonds with unset order.");
207 for (IBond bnd : mol.bonds())
208 {
209 if (bnd.getOrder().equals(IBond.Order.UNSET))
210 {
211 bnd.setOrder(IBond.Order.SINGLE);
212 }
213 }
214 }
215 }
216 }
217 return true;
218 }
219
220//------------------------------------------------------------------------------
221
233 public static void filterStrucutresBySMARTS(File input, Set<String> smarts,
234 File output, Logger logger)
235 throws DENOPTIMException, IOException
236 {
237 FileInputStream fis = new FileInputStream(input);
238 IteratingSDFReader reader = new IteratingSDFReader(fis,
239 DefaultChemObjectBuilder.getInstance());
240
241 int i = -1;
242 Map<String, String> smartsMap = new HashMap<String, String>();
243 for (String s : smarts)
244 {
245 i++;
246 smartsMap.put("prefilter-"+i, s);
247 }
248
249 int index = -1;
250 int maxBufferSize = 2000;
251 ArrayList<IAtomContainer> buffer = new ArrayList<IAtomContainer>(500);
252 try {
253 while (reader.hasNext())
254 {
255 index++;
256 if (logger!=null)
257 {
258 logger.log(Level.FINE,"Prefiltering structure " + index);
259 }
260 IAtomContainer mol = reader.next();
261
262 ManySMARTSQuery msq = new ManySMARTSQuery(mol, smartsMap);
263 if (msq.hasProblems())
264 {
265 String msg = "WARNING! Problems while searching for "
266 + "specific atoms/bonds using SMARTS: "
267 + msq.getMessage();
268 throw new DENOPTIMException(msg,msq.getProblem());
269 }
270 Map<String, Mappings> allMatches = msq.getAllMatches();
271
272 if (allMatches.size()==0)
273 {
274 buffer.add(mol);
275 } else {
276 String hits = "";
277 for (String s : allMatches.keySet())
278 hits = hits + DenoptimIO.NL + smartsMap.get(s);
279 if (logger!=null)
280 {
281 logger.log(Level.INFO,"Found match for " + hits
282 + "Rejecting structure " + index + ": "
283 + mol.getTitle());
284 }
285 }
286
287 // If max buffer size is reached, then bump to file
288 if (buffer.size() >= maxBufferSize)
289 {
290 DenoptimIO.writeSDFFile(output.getAbsolutePath(), buffer,
291 true);
292 buffer.clear();
293 }
294 }
295 } finally {
296 reader.close();
297 }
298 if (buffer.size() < maxBufferSize)
299 {
300 DenoptimIO.writeSDFFile(output.getAbsolutePath(), buffer, true);
301 buffer.clear();
302 }
303 }
304
305//------------------------------------------------------------------------------
306
322 public static boolean fragmentationFromGraphs(File input,
323 FragmenterParameters settings, File output, Logger logger)
324 throws DENOPTIMException, Exception
325 {
326 int totalProd = 0;
327 for (DGraph graph : DenoptimIO.readDENOPTIMGraphsFromFile(input))
328 {
329 List<Vertex> fragments = graph.getVertexList();
330
331 // Post-fragmentation processing of fragments
332 List<Vertex> keptFragments = new ArrayList<Vertex>();
333 int fragCounter = 0;
334 for (Vertex frag : fragments)
335 {
336 // Add metadata
337 fragCounter++;
338 manageFragmentCollection(frag, fragCounter, settings,
339 keptFragments, logger);
340 }
341 if (logger!=null)
342 {
343 logger.log(Level.FINE,"Fragments surviving post-"
344 + "processing: " + keptFragments.size());
345 }
346
347 if (keptFragments.size()>0)
348 {
349 totalProd += keptFragments.size();
351 keptFragments, true);
352 }
353 }
354 return totalProd>0;
355 }
356
357//------------------------------------------------------------------------------
358
366 public static List<Vertex> fragmentation(IAtomContainer mol,
367 FragmenterParameters settings)
368 throws DENOPTIMException
369 {
370 List<Vertex> fragments = new ArrayList<Vertex>();
371 if (settings.getFragmentationTmpls().size()>0)
372 {
373 fragments = fragmentation(mol, settings.getFragmentationTmpls(),
374 settings.getMaxBufferShellSize(),
375 settings.getRandomizer(), settings.getLogger());
376 } else {
377 fragments = fragmentation(mol, settings.getCuttingRules(),
378 settings.getLogger());
379 }
380 return fragments;
381 }
382
383//-----------------------------------------------------------------------------
384
404 public static boolean fragmentation(File input, FragmenterParameters settings,
405 File output, Logger logger) throws CDKException, IOException,
406 DENOPTIMException, IllegalArgumentException, UndetectedFileFormatException
407 {
410
411 int totalProd = 0;
412 int totalKept = 0;
413 int index = -1;
414 try {
415 while (iterator.hasNext())
416 {
417 index++;
418 if (logger!=null)
419 {
420 logger.log(Level.FINE,"Fragmenting structure " + index);
421 }
422 IAtomContainer mol = iterator.next();
423 String molName = "noname-mol" + index;
424 if (mol.getTitle()!=null && !mol.getTitle().isBlank())
425 molName = mol.getTitle();
426
427 // Generate the fragments
428 List<Vertex> fragments = fragmentation(mol, settings);
429 if (logger!=null)
430 {
431 logger.log(Level.FINE,"Fragmentation produced "
432 + fragments.size() + " fragments.");
433 }
434 totalProd += fragments.size();
435
436 // Post-fragmentation processing of fragments
437 List<Vertex> keptFragments = new ArrayList<Vertex>();
438 int fragCounter = 0;
439 for (Vertex frag : fragments)
440 {
441 // Add metadata
442 String fragIdStr = "From_" + molName + "_" + fragCounter;
443 frag.setProperty("cdk:Title", fragIdStr);
444 fragCounter++;
445 manageFragmentCollection(frag, fragCounter, settings,
446 keptFragments, logger);
447 }
448 if (logger!=null)
449 {
450 logger.log(Level.FINE,"Fragments surviving post-"
451 + "processing: " + keptFragments.size());
452 }
453 totalKept += keptFragments.size();
454 if (!settings.doManageIsomorphicFamilies() && totalKept>0)
455 {
457 keptFragments,true);
458 }
459 }
460 } finally {
461 iterator.close();
462 }
463
464 // Did we actually produce anything? We might not...
465 if (totalProd==0)
466 {
467 if (logger!=null)
468 {
469 logger.log(Level.WARNING,"No fragment produced. Cutting rules "
470 + "were ineffective on the given structures.");
471 }
472 return false;
473 } else if (totalKept==0)
474 {
475 if (logger!=null)
476 {
477 logger.log(Level.WARNING,"No fragment kept out of " + totalProd
478 + " produced fragments. Filtering criteria might be "
479 + "too restrictive.");
480 }
481 return false;
482 }
483 return true;
484 }
485
486//------------------------------------------------------------------------------
487
501 public static List<Vertex> fragmentation(IAtomContainer mol,
502 List<DGraph> templates, int maxBufferShellSize,
503 Randomizer randomizer, Logger logger)
504 throws DENOPTIMException
505 {
506 // The best set of fragments is the one with as many fragments that are isomorfic
507 // // with the template
508 List<Vertex> fragments = new ArrayList<Vertex>();
509 int maxNumIsomorfFrgs = -1;
510 for (DGraph templateGraph : templates)
511 {
512 // Check if the template is connected
513 if (!templateGraph.isConnected())
514 {
515 // This limitation derives from the fact that to speed up substructure
516 // search we reduce the template graph to the smallest set of atoms that
517 // identify the topology of APs. The
518 // can identify the matching topology. This is not possible for disconnected
519 // graphs.
520 throw new DENOPTIMException("Template graph is not connected. "
521 + "Cannot use it for fragmentation. Please check the "
522 + "template graph and make sure it is connected.");
523 }
524
525 ThreeDimTreeBuilder tb = new ThreeDimTreeBuilder(logger, randomizer);
526 IAtomContainer templateMol = tb.convertGraphTo3DAtomContainer(templateGraph,
527 true, true, true);
528
529 //TODO: consider splitting the template into connected components and
530 // reduce each component independently.
531
532 // Optimized creation of a reduced templateMol for faster isomorphism search
533 // Keep full templateMol for exploreDGraphForMappings
534 TopoTemplateProducer ttp = new TopoTemplateProducer(templateMol);
535
536 // The best set of fragments from this template may not be perfect, so we
537 // consider a score that consists of how many vertexes are isomorphic
538 // to the template graph vertexes.
539 List<Vertex> fragsFromThisTemplate = new ArrayList<>();
540 int maxNumIsomorfFrgsFromThisTmpl = -1;
541
542 // Define the maximum theoretical score considering that the template may
543 // be matched multiple times in the molecule
544 List<Vertex> templateVertexes = templateGraph.getVertexAnyLevel();
545 int maxTheoreticalScorePerMapping = 0;
546 for (int iva=0; iva<templateVertexes.size(); iva++)
547 {
548 Vertex va = templateVertexes.get(iva);
549 if (!(va instanceof Fragment))
550 {
551 continue;
552 }
553 for (int ivb=0; ivb<templateVertexes.size(); ivb++)
554 {
555 Vertex vb = templateVertexes.get(ivb);
556 if (!(vb instanceof Fragment))
557 {
558 continue;
559 }
560 if (iva == ivb)
561 {
562 // we know that the same
563 maxTheoreticalScorePerMapping++;
564 } else if (((Fragment) va).isIsomorphicTo(vb))
565 {
566 maxTheoreticalScorePerMapping++;
567 }
568 }
569 }
570
571 // Use the smallest possible template needed to identify the graph topology
572 // in the molecular structure.
573 for (int bufferShellSize = 0; bufferShellSize < maxBufferShellSize; bufferShellSize++)
574 {
575 // Use a reduced templateMol for isomorphism search (faster)
576 IAtomContainer reducedTemplateMol = ttp.getTemplateWithBufferShell(bufferShellSize);
577 List<Map<IAtom,IAtom>> reducedAtomMappings = MoleculeUtils.findUniqueAtomMappings(
578 reducedTemplateMol, mol, logger);
579
580 // Convert mappings from reducedTemplateMol:mol to templateMol:mol using stored indices
581 List<Map<IAtom,IAtom>> atomMappings = new ArrayList<>();
582 for (Map<IAtom,IAtom> reducedMapping : reducedAtomMappings)
583 {
584 Map<IAtom,IAtom> fullMapping = new HashMap<>();
585 for (Map.Entry<IAtom,IAtom> entry : reducedMapping.entrySet())
586 {
587 IAtom reducedAtom = entry.getKey();
588 IAtom molAtom = entry.getValue();
589
590 // Get original atom index from reduced atom property
591 Object indexObj = reducedAtom.getProperty("DENOPTIM_ORIGINAL_ATOM_INDEX");
592 if (indexObj != null)
593 {
594 int originalIndex = ((Number) indexObj).intValue();
595 IAtom originalAtom = templateMol.getAtom(originalIndex);
596 if (originalAtom != null)
597 {
598 fullMapping.put(originalAtom, molAtom);
599 }
600 }
601 }
602 if (!fullMapping.isEmpty())
603 {
604 atomMappings.add(fullMapping);
605 }
606 }
607
608 Fragment masterFrag = new Fragment(mol, BBType.UNDEFINED);
609 IAtomContainer masterFragIAC = masterFrag.getIAtomContainer();
610
611 // For each atom mapping, replace bonds corresponding to edges
612 // (at any embedding level) with attachment points and annotate embedding level.
613 // In case of symmetry, multiple mappings will effectivly cut the same bonds,
614 // and produce the same fragments of the template graph only for
615 // symmetry-redundant mappings.
616 for (Map<IAtom,IAtom> atomMapping : atomMappings)
617 {
618 try {
619 exploreDGraphForMappings(templateGraph, templateMol,
620 masterFrag, masterFragIAC, mol, atomMapping);
621 } catch (Throwable e) {
622 e.printStackTrace();
623 logger.log(Level.WARNING, "Error while exploring the template graph: " + e.getMessage());
624 continue;
625 }
626 }
627
628 // Extract isolated fragments
629 List<Vertex> locfragments = new ArrayList<Vertex>();
630 Set<Integer> doneAlready = new HashSet<Integer>();
631 for (int idx=0 ; idx<masterFrag.getAtomCount(); idx++)
632 {
633 if (doneAlready.contains(idx))
634 continue;
635
636 Fragment cloneOfMaster = masterFrag.clone();
637 IAtomContainer iac = cloneOfMaster.getIAtomContainer();
638 Set<IAtom> atmsToKeep = exploreConnectivity(iac.getAtom(idx), iac);
639 atmsToKeep.stream().forEach(atm -> doneAlready.add(iac.indexOf(atm)));
640
641 Set<IAtom> atmsToRemove = new HashSet<IAtom>();
642 for (IAtom atm : cloneOfMaster.atoms())
643 {
644 if (!atmsToKeep.contains(atm))
645 {
646 atmsToRemove.add(atm);
647 }
648 }
649 cloneOfMaster.removeAtoms(atmsToRemove);
650 if (cloneOfMaster.getAttachmentPoints().size()>0)
651 locfragments.add(cloneOfMaster);
652 }
653
654 int localScore = 0;
655 for (Vertex frag : locfragments)
656 {
657 if (!(frag instanceof Fragment))
658 {
659 continue;
660 }
661 for (Vertex templateVertex : templateGraph.getVertexList())
662 {
663 if (((Fragment) frag).isIsomorphicTo(templateVertex))
664 {
665 localScore++;
666 }
667 }
668 }
669
670 if (localScore > maxNumIsomorfFrgsFromThisTmpl)
671 {
672 maxNumIsomorfFrgsFromThisTmpl = localScore;
673 fragsFromThisTemplate = locfragments;
674 if (maxNumIsomorfFrgsFromThisTmpl ==
675 maxTheoreticalScorePerMapping*atomMappings.size())
676 {
677 break;
678 }
679 }
680 }
681
682 if (maxNumIsomorfFrgsFromThisTmpl > maxNumIsomorfFrgs)
683 {
684 maxNumIsomorfFrgs = maxNumIsomorfFrgsFromThisTmpl;
685 fragments = fragsFromThisTemplate;
686 }
687 }
688 return fragments;
689 }
690
691//------------------------------------------------------------------------------
692
701 private static void exploreDGraphForMappings(DGraph graph, IAtomContainer graphIAC,
702 Fragment masterFrag, IAtomContainer masterFragIAC, IAtomContainer mol,
703 Map<IAtom,IAtom> graphToMolMapping)
704 throws DENOPTIMException
705 {
706 int cutId = -1;
707 for (Edge edge : graph.getEdgeList())
708 {
709 cutId++;
710 int srcAPIdx = edge.getSrcAP().getAtomPositionNumberInMol();
711 int trgAPIdx = edge.getTrgAP().getAtomPositionNumberInMol();
712 IAtom graphAtmSrc = graphIAC.getAtom(srcAPIdx);
713 IAtom graphAtmTrg = graphIAC.getAtom(trgAPIdx);
714
715 IAtom masterFragAtmSrc = masterFragIAC.getAtom(mol.indexOf(graphToMolMapping.get(graphAtmSrc)));
716 IAtom masterFragAtmTrg = masterFragIAC.getAtom(mol.indexOf(graphToMolMapping.get(graphAtmTrg)));
717
718 IBond bnd = masterFragIAC.getBond(masterFragAtmSrc, masterFragAtmTrg);
719 if (bnd != null)
720 {
721 masterFragIAC.removeBond(bnd);
722 }
723
724 AttachmentPoint srcAP = masterFrag.addAPOnAtom(masterFragAtmSrc,
725 edge.getSrcAP().getAPClass(),
726 MoleculeUtils.getPoint3d(masterFragAtmTrg));
727 srcAP.setCutId(cutId);
728 AttachmentPoint trgAP = masterFrag.addAPOnAtom(masterFragAtmTrg,
729 edge.getTrgAP().getAPClass(),
730 MoleculeUtils.getPoint3d(masterFragAtmSrc));
731 trgAP.setCutId(cutId);
732 }
733
734 for (Vertex v : graph.getVertexList())
735 {
736 if (v instanceof Template)
737 {
738 DGraph innerGraph = ((Template) v).getInnerGraph();
739 exploreDGraphForMappings(innerGraph, graphIAC, masterFrag, masterFragIAC, mol, graphToMolMapping);
740 continue;
741 }
742
743 // Deal with vertexes without edges, i.e., free APs
744 for (AttachmentPoint ap : v.getAttachmentPoints())
745 {
746 if (!ap.isAvailable())
747 {
748 continue;
749 }
750 IAtom graphAtmSrc = graphIAC.getAtom(ap.getAtomPositionNumberInMol());
751 IAtom masterFragAtmSrc = masterFragIAC.getAtom(mol.indexOf(graphToMolMapping.get(graphAtmSrc)));
752
753 // find the position of the AP head in 3D space by aligning geoetry to template geometry
754 Point3d apHead = findPointAlignedWithTmpl(ap, graphAtmSrc,
755 graphIAC, masterFragIAC, mol, graphToMolMapping);
756
757 // Make the free AP on the master
758 masterFrag.addAPOnAtom(masterFragAtmSrc, ap.getAPClass(), apHead);
759 }
760 }
761 }
762
763//------------------------------------------------------------------------------
764
781 private static Point3d findPointAlignedWithTmpl(AttachmentPoint ap,
782 IAtom graphAtmSrc, IAtomContainer graphIAC, IAtomContainer masterFragIAC,
783 IAtomContainer mol,
784 Map<IAtom,IAtom> graphToMolMapping) throws DENOPTIMException
785 {
786 // Ensure assumptions
787 if (!graphIAC.contains(graphAtmSrc))
788 {
789 throw new DENOPTIMException("The atom " + graphAtmSrc.getSymbol() + " is not in the graphIAC.");
790 }
791 if (mol.getAtomCount() != masterFragIAC.getAtomCount())
792 {
793 throw new DENOPTIMException("The number of atoms in the molecule and the master fragment are not the same.");
794 }
795
796 // Work with cloned molecules
797 IAtomContainer molA = MoleculeUtils.makeSameAs(masterFragIAC);
798 IAtomContainer molB = MoleculeUtils.makeSameAs(graphIAC);
799
800 // Analogue of graphAtmSrc (belongs to graphIAC) on molA, which is a clone of masterFragIAC
801 // which is consistent with mol.
802 IAtom graphAtmSrcOnMolA = molA.getAtom(mol.indexOf(graphToMolMapping.get(graphAtmSrc)));
803
804 // Add the AP as a dummy atom to the molecule that will be rototranslated
805 IAtom dummyAtm = new Atom("He"); //element is irrelevant
806 dummyAtm.setPoint3d(ap.getDirectionVector());
807 molB.addAtom(dummyAtm);
808
809 List<IAtom> closestNeighborsOnGrphIAC = graphIAC.getConnectedAtomsList(graphAtmSrc);
810 List<IAtom> closestNeighborsOnMolA = molA.getConnectedAtomsList(graphAtmSrcOnMolA);
811 int minNumClosestNeighbors = Math.min(closestNeighborsOnGrphIAC.size(), closestNeighborsOnMolA.size());
812 if (minNumClosestNeighbors < 1)
813 {
814 // We do not have enough atoms to do an alignement: use only distance
815 // NB: the case of a biatomic fragment is handles below.
816 double dist = graphAtmSrc.getPoint3d().distance(ap.getDirectionVector());
817 Point3d apHead = new Point3d(
818 graphAtmSrcOnMolA.getPoint3d().x + dist,
819 graphAtmSrcOnMolA.getPoint3d().y,
820 graphAtmSrcOnMolA.getPoint3d().z);
821 return apHead;
822 } else if (minNumClosestNeighbors == 1) {
823 // For biatomic fragments
824 double dist = graphAtmSrc.getPoint3d().distance(ap.getDirectionVector());
825 double angle = MathUtils.angle(
826 closestNeighborsOnGrphIAC.get(0).getPoint3d(),
827 graphAtmSrc.getPoint3d(),
828 ap.getDirectionVector());
829 IAtom nbrAtmOnMolA = molA.getAtom(mol.indexOf(graphToMolMapping.get(closestNeighborsOnGrphIAC.get(0))));
830 Point3d pSrc = graphAtmSrcOnMolA.getPoint3d();
831 Point3d pNbr = nbrAtmOnMolA.getPoint3d();
832 Point3d pSrcTmpl = graphAtmSrc.getPoint3d();
833 Point3d pNbrTmpl = closestNeighborsOnGrphIAC.get(0).getPoint3d();
834 Point3d pApTmpl = ap.getDirectionVector();
835
836 Vector3d uMol = CartesianSpaceUtils.getVectorFromTo(pSrc, pNbr);
837 uMol.normalize();
838 Vector3d uTmpl = CartesianSpaceUtils.getVectorFromTo(pSrcTmpl, pNbrTmpl);
839 uTmpl.normalize();
840 Vector3d wTmpl = CartesianSpaceUtils.getVectorFromTo(pSrcTmpl, pApTmpl);
841 wTmpl.normalize();
842
843 double angleRad = Math.toRadians(angle);
844 Vector3d apDir = new Vector3d();
845 apDir.scale(Math.cos(angleRad), uMol);
846 double sinAngle = Math.sin(angleRad);
848 {
849 Vector3d vRefTmpl = new Vector3d(wTmpl);
850 vRefTmpl.scaleAdd(-uTmpl.dot(wTmpl), uTmpl, vRefTmpl);
851 vRefTmpl.normalize();
852 Vector3d rotAxis = new Vector3d();
853 rotAxis.cross(uTmpl, uMol);
854 double sinRot = rotAxis.length();
855 double cosRot = uTmpl.dot(uMol);
857 {
858 rotAxis.normalize();
859 double rotAng = Math.toDegrees(Math.atan2(sinRot, cosRot));
860 CartesianSpaceUtils.rotatedVectorWAxisAngle(vRefTmpl, rotAxis, rotAng);
861 }
862 else if (cosRot < 0.0)
863 {
864 Vector3d flipAxis = CartesianSpaceUtils.getNormalDirection(uMol);
865 CartesianSpaceUtils.rotatedVectorWAxisAngle(vRefTmpl, flipAxis, 180.0);
866 }
867 vRefTmpl.scale(sinAngle);
868 apDir.add(vRefTmpl);
869 }
870
871 Point3d apHead = new Point3d(
872 pSrc.x + dist * apDir.x,
873 pSrc.y + dist * apDir.y,
874 pSrc.z + dist * apDir.z);
875 return apHead;
876 } else {
877 // Try to alogn atoms around AP sourc to get the position of the AP head
878 Map<IAtom,IAtom> molToGraphMappingAroundAPSrc = new HashMap<>();
879 Set<IAtom> atomsAlreadyUsed = new HashSet<>();
880 for (Map.Entry<IAtom,IAtom> pair : graphToMolMapping.entrySet())
881 {
882 IAtom atmGraphIAC = pair.getKey();
883 if (graphIAC.getConnectedAtomsList(graphAtmSrc).contains(atmGraphIAC)
884 || atmGraphIAC == graphAtmSrc)
885 {
886 atomsAlreadyUsed.add(atmGraphIAC);
887 molToGraphMappingAroundAPSrc.put(
888 molA.getAtom(mol.indexOf(pair.getValue())),
889 molB.getAtom(graphIAC.indexOf(atmGraphIAC)));
890 }
891 }
892 // When we have few atoms we take also the second shell of neighbors
893 if (minNumClosestNeighbors<3)
894 {
895 for (Map.Entry<IAtom,IAtom> pair : graphToMolMapping.entrySet())
896 {
897 IAtom atmGraphIAC = pair.getKey();
898 if (closestNeighborsOnGrphIAC.contains(atmGraphIAC)
899 && !atomsAlreadyUsed.contains(atmGraphIAC))
900 {
901 atomsAlreadyUsed.add(atmGraphIAC);
902 molToGraphMappingAroundAPSrc.put(
903 molA.getAtom(mol.indexOf(pair.getValue())),
904 molB.getAtom(graphIAC.indexOf(atmGraphIAC)));
905 }
906 }
907 }
908
909 // Make mapping in a format suitable for KabschAlignment
910 IAtom[] lstA = new IAtom[molToGraphMappingAroundAPSrc.size()];
911 IAtom[] lstB = new IAtom[molToGraphMappingAroundAPSrc.size()];
912 int k = 0;
913 for (Map.Entry<IAtom,IAtom> pair : molToGraphMappingAroundAPSrc.entrySet())
914 {
915 lstA[k] = pair.getKey();
916 lstB[k] = pair.getValue();
917 k++;
918 }
919
920 //Calculate rotational matrix and align
921 KabschAlignment sa;
922 try
923 {
924 sa = new KabschAlignment(lstA, lstB);
925 sa.align();
926 } catch (Throwable t) {
927 throw new DENOPTIMException("KabschAlignment failed.", t);
928 }
929
930 //Rototranslation of molecule B (origin is in A's center of mass!!!)
931 sa.rotateAtomContainer(molB);
932
933 // Translate B to to actual coordinates of A (instead of c.o.m.)
934 Point3d cm = sa.getCenterOfMass();
935 for (int ib = 0; ib < molB.getAtomCount(); ib++)
936 {
937 IAtom a = molB.getAtom(ib);
938 Point3d oldPlace = MoleculeUtils.getPoint3d(a);
939 Point3d newPlace = new Point3d(oldPlace.x + cm.x,
940 oldPlace.y + cm.y,
941 oldPlace.z + cm.z);
942 a.setPoint3d(newPlace);
943 }
944 }
945 return MoleculeUtils.getPoint3d(dummyAtm);
946 }
947
948//------------------------------------------------------------------------------
949
958 public static List<Vertex> fragmentation(IAtomContainer mol,
959 List<CuttingRule> rules, Logger logger) throws DENOPTIMException
960 {
961 Fragment masterFrag = new Fragment(mol,BBType.UNDEFINED);
962 IAtomContainer fragsMol = masterFrag.getIAtomContainer();
963
964 // Identify bonds
965 Map<String, List<MatchedBond>> matchingbonds =
966 FragmenterTools.getMatchingBondsAllInOne(fragsMol,rules,logger);
967
968 // Select bonds to cut and what rule to use for cutting them
969 int cutId = -1;
970 for (CuttingRule rule : rules) // NB: iterator follows rule's priority
971 {
972 String ruleName = rule.getName();
973
974 // Skip unmatched rules
975 if (!matchingbonds.keySet().contains(ruleName))
976 continue;
977
978 for (MatchedBond tb: matchingbonds.get(ruleName))
979 {
980 IAtom atmA = tb.getAtmSubClass0();
981 IAtom atmB = tb.getAtmSubClass1();
982
983 //ignore if bond already broken
984 if (!fragsMol.getConnectedAtomsList(atmA).contains(atmB))
985 {
986 continue;
987 }
988
989 //treatment of n-hapto ligands
990 if (rule.isHAPTO())
991 {
992 // Get central atom (i.e., the "mono-hapto" side,
993 // typically the metal)
994 // As a convention the central atom has subclass '0'
995 IAtom centralAtm = atmA;
996
997 // Get list of candidates for hapto-system:
998 // they have same cutting Rule and central metal
999 ArrayList<IAtom> candidatesForHapto = new ArrayList<IAtom>();
1000 for (MatchedBond tbForHapto : matchingbonds.get(ruleName))
1001 {
1002 //Consider only bond involving same central atom
1003 if (tbForHapto.getAtmSubClass0() == centralAtm)
1004 candidatesForHapto.add(tbForHapto.getAtmSubClass1());
1005 }
1006
1007 // Select atoms in n-hapto system: contiguous neighbors with
1008 // same type of bond with the same central atom.
1009 Set<IAtom> atmsInHapto = new HashSet<IAtom>();
1010 atmsInHapto.add(tb.getAtmSubClass1());
1011 atmsInHapto = exploreHapticity(tb.getAtmSubClass1(),
1012 centralAtm, candidatesForHapto, fragsMol);
1013 if (atmsInHapto.size() == 1)
1014 {
1015 logger.log(Level.WARNING,"Unable to find more than one "
1016 + "bond involved in high-hapticity ligand! "
1017 + "Bond ignored.");
1018 continue;
1019 }
1020
1021 // Check existence of all bonds involved in multi-hapto system
1022 boolean isSystemIntact = true;
1023 for (IAtom ligAtm : atmsInHapto)
1024 {
1025 List<IAtom> nbrsOfLigAtm =
1026 fragsMol.getConnectedAtomsList(ligAtm);
1027 if (!nbrsOfLigAtm.contains(centralAtm))
1028 {
1029 isSystemIntact = false;
1030 break;
1031 }
1032 }
1033
1034 // If not, it means that another rule already acted on the
1035 // system thus kill this attempt without generating du-atom
1036 if (!isSystemIntact)
1037 continue;
1038
1039 // A dummy atom will be used to define attachment point of
1040 // ligand with high hapticity
1041 Point3d dummyP3d = new Point3d(); //Used also for 2D
1042 for (IAtom ligAtm : atmsInHapto)
1043 {
1044 Point3d ligP3d = MoleculeUtils.getPoint3d(ligAtm);
1045 dummyP3d.x = dummyP3d.x + ligP3d.x;
1046 dummyP3d.y = dummyP3d.y + ligP3d.y;
1047 dummyP3d.z = dummyP3d.z + ligP3d.z;
1048 }
1049
1050 dummyP3d.x = dummyP3d.x / (double) atmsInHapto.size();
1051 dummyP3d.y = dummyP3d.y / (double) atmsInHapto.size();
1052 dummyP3d.z = dummyP3d.z / (double) atmsInHapto.size();
1053
1054 //Add Dummy atom to molecular object
1055 //if no other Du is already in the same position
1056 IAtom dummyAtm = null;
1057 for (IAtom oldDu : fragsMol.atoms())
1058 {
1061 {
1062 Point3d oldDuP3d = oldDu.getPoint3d();
1063 if (oldDuP3d.distance(dummyP3d) < 0.002)
1064 {
1065 dummyAtm = oldDu;
1066 break;
1067 }
1068 }
1069 }
1070
1071 if (dummyAtm==null)
1072 {
1073 dummyAtm = new PseudoAtom(DENOPTIMConstants.DUMMYATMSYMBOL);
1074 dummyAtm.setPoint3d(dummyP3d);
1075 fragsMol.addAtom(dummyAtm);
1076 }
1077
1078 // Modify connectivity of atoms involved in high-hapticity
1079 // coordination creation of Du-to-ATM bonds
1080 // By internal convention the bond order is "SINGLE".
1081 IBond.Order border = IBond.Order.valueOf("SINGLE");
1082
1083 for (IAtom ligAtm : atmsInHapto)
1084 {
1085 List<IAtom> nbrsOfDu = fragsMol.getConnectedAtomsList(
1086 dummyAtm);
1087 if (!nbrsOfDu.contains(ligAtm))
1088 {
1089 // Add bond with dummy
1090 Bond bnd = new Bond(dummyAtm,ligAtm,border);
1091 fragsMol.addBond(bnd);
1092 }
1093 // Remove bonds between central and coordinating atoms
1094 IBond oldBnd = fragsMol.getBond(centralAtm,ligAtm);
1095 fragsMol.removeBond(oldBnd);
1096 }
1097
1098 // NB: by convention the "first" class (i.e., the ???:0 class)
1099 // is always on the central atom.
1100 AttachmentPoint apA = masterFrag.addAPOnAtom(centralAtm,
1101 rule.getAPClass0(),
1102 MoleculeUtils.getPoint3d(dummyAtm));
1103 AttachmentPoint apB = masterFrag.addAPOnAtom(dummyAtm,
1104 rule.getAPClass1(),
1105 MoleculeUtils.getPoint3d(centralAtm));
1106
1107 cutId++;
1108 apA.setCutId(cutId);
1109 apB.setCutId(cutId);
1110 } else {
1111 //treatment of mono-hapto ligands
1112 IBond bnd = fragsMol.getBond(atmA,atmB);
1113 fragsMol.removeBond(bnd);
1114
1115 AttachmentPoint apA = masterFrag.addAPOnAtom(atmA,
1116 rule.getAPClass0(),
1118 AttachmentPoint apB = masterFrag.addAPOnAtom(atmB,
1119 rule.getAPClass1(),
1121
1122 cutId++;
1123 apA.setCutId(cutId);
1124 apB.setCutId(cutId);
1125 } //end of if (hapticity>1)
1126 } //end of loop over matching bonds
1127 } //end of loop over rules
1128
1129 // Extract isolated fragments
1130 List<Vertex> fragments = new ArrayList<Vertex>();
1131 Set<Integer> doneAlready = new HashSet<Integer>();
1132 for (int idx=0 ; idx<masterFrag.getAtomCount(); idx++)
1133 {
1134 if (doneAlready.contains(idx))
1135 continue;
1136
1137 Fragment cloneOfMaster = masterFrag.clone();
1138 IAtomContainer iac = cloneOfMaster.getIAtomContainer();
1139 Set<IAtom> atmsToKeep = exploreConnectivity(iac.getAtom(idx), iac);
1140 atmsToKeep.stream().forEach(atm -> doneAlready.add(iac.indexOf(atm)));
1141
1142 Set<IAtom> atmsToRemove = new HashSet<IAtom>();
1143 for (IAtom atm : cloneOfMaster.atoms())
1144 {
1145 if (!atmsToKeep.contains(atm))
1146 {
1147 atmsToRemove.add(atm);
1148 }
1149 }
1150 cloneOfMaster.removeAtoms(atmsToRemove);
1151 if (cloneOfMaster.getAttachmentPoints().size()>0)
1152 fragments.add(cloneOfMaster);
1153 }
1154
1155 return fragments;
1156 }
1157
1158//------------------------------------------------------------------------------
1172 static Set<IAtom> exploreHapticity(IAtom seed, IAtom centralAtom,
1173 ArrayList<IAtom> candidates, IAtomContainer mol)
1174 {
1175 Set<IAtom> atmsInHapto = new HashSet<IAtom>();
1176 atmsInHapto.add(seed);
1177 ArrayList<IAtom> toVisitAtoms = new ArrayList<IAtom>();
1178 toVisitAtoms.add(seed);
1179 ArrayList<IAtom> visitedAtoms = new ArrayList<IAtom>();
1180 while (toVisitAtoms.size()>0)
1181 {
1182 ArrayList<IAtom> toVisitLater = new ArrayList<IAtom>();
1183 for (IAtom atomInFocus : toVisitAtoms)
1184 {
1185 if (visitedAtoms.contains(atomInFocus)
1186 || atomInFocus==centralAtom)
1187 continue;
1188 else
1189 visitedAtoms.add(atomInFocus);
1190
1191 if (candidates.contains(atomInFocus))
1192 {
1193 atmsInHapto.add(atomInFocus);
1194 toVisitLater.addAll(mol.getConnectedAtomsList(atomInFocus));
1195 }
1196 }
1197 toVisitAtoms.clear();
1198 toVisitAtoms.addAll(toVisitLater);
1199 }
1200 return atmsInHapto;
1201 }
1202
1203//------------------------------------------------------------------------------
1212 static Set<IAtom> exploreConnectivity(IAtom seed, IAtomContainer mol)
1213 {
1214 Set<IAtom> atmsReachableFromSeed = new HashSet<IAtom>();
1215 ArrayList<IAtom> toVisitAtoms = new ArrayList<IAtom>();
1216 toVisitAtoms.add(seed);
1217 ArrayList<IAtom> visitedAtoms = new ArrayList<IAtom>();
1218 while (toVisitAtoms.size()>0)
1219 {
1220 ArrayList<IAtom> toVisitLater = new ArrayList<IAtom>();
1221 for (IAtom atomInFocus : toVisitAtoms)
1222 {
1223 if (visitedAtoms.contains(atomInFocus))
1224 continue;
1225 else
1226 visitedAtoms.add(atomInFocus);
1227
1228 atmsReachableFromSeed.add(atomInFocus);
1229 toVisitLater.addAll(mol.getConnectedAtomsList(atomInFocus));
1230 }
1231 toVisitAtoms.clear();
1232 toVisitAtoms.addAll(toVisitLater);
1233 }
1234 return atmsReachableFromSeed;
1235 }
1236
1237//-----------------------------------------------------------------------------
1238
1247 static Map<String, List<MatchedBond>> getMatchingBondsAllInOne(
1248 IAtomContainer mol, List<CuttingRule> rules, Logger logger)
1249 {
1250 // Collect all SMARTS queries
1251 Map<String,String> smarts = new HashMap<String,String>();
1252 for (CuttingRule rule : rules)
1253 {
1254 smarts.put(rule.getName(),rule.getWholeSMARTSRule());
1255 }
1256
1257 // Prepare a data structure for the return value
1258 Map<String, List<MatchedBond>> bondsMatchingRules =
1259 new HashMap<String, List<MatchedBond>>();
1260
1261 // Get all the matches to the SMARTS queries
1262 ManySMARTSQuery msq = new ManySMARTSQuery(mol, smarts);
1263 if (msq.hasProblems())
1264 {
1265 if (logger!=null)
1266 {
1267 logger.log(Level.WARNING, "Problem matching SMARTS: "
1268 + msq.getMessage());
1269 }
1270 return bondsMatchingRules;
1271 }
1272
1273 for (CuttingRule rule : rules)
1274 {
1275 String ruleName = rule.getName();
1276
1277 if (msq.getNumMatchesOfQuery(ruleName) == 0)
1278 {
1279 continue;
1280 }
1281
1282 // Get atoms matching cutting rule queries
1283 Mappings purgedPairs = msq.getMatchesOfSMARTS(ruleName);
1284
1285 // Evaluate subclass membership and eventually store target bonds
1286 ArrayList<MatchedBond> bondsMatched = new ArrayList<MatchedBond>();
1287 for (int[] pair : purgedPairs)
1288 {
1289 if (pair.length!=2)
1290 {
1291 throw new Error("Cutting rule: " + ruleName
1292 + " has identified " + pair.length + " atoms "
1293 + "instead of 2. Modify rule to make it find a "
1294 + "pair of atoms.");
1295 }
1296 MatchedBond tb = new MatchedBond(mol.getAtom(pair[0]),
1297 mol.getAtom(pair[1]), rule);
1298
1299 // Apply any further option of the cutting rule
1300 if (tb.satisfiesRuleOptions(logger))
1301 bondsMatched.add(tb);
1302 }
1303
1304 if (!bondsMatched.isEmpty())
1305 bondsMatchingRules.put(ruleName, bondsMatched);
1306 }
1307
1308 return bondsMatchingRules;
1309 }
1310
1311//------------------------------------------------------------------------------
1312
1318 public static void manageFragmentCollection(File input,
1319 FragmenterParameters settings,
1320 File output, Logger logger) throws DENOPTIMException, IOException,
1321 IllegalArgumentException, UndetectedFileFormatException
1322 {
1323 FileInputStream fis = new FileInputStream(input);
1324 IteratingSDFReader reader = new IteratingSDFReader(fis,
1325 DefaultChemObjectBuilder.getInstance());
1326
1327 int index = -1;
1328 int maxBufferSize = 2000;
1329 ArrayList<Vertex> buffer = new ArrayList<Vertex>(500);
1330 try {
1331 while (reader.hasNext())
1332 {
1333 index++;
1334 if (logger!=null)
1335 {
1336 logger.log(Level.FINE,"Processing fragment " + index);
1337 }
1338 Vertex frag = new Fragment(reader.next(), BBType.UNDEFINED);
1339 manageFragmentCollection(frag, index, settings,
1340 buffer, logger);
1341
1342 // If max buffer size is reached, then bump to file
1343 if (buffer.size() >= maxBufferSize)
1344 {
1346 buffer, true);
1347 buffer.clear();
1348 }
1349 }
1350 } finally {
1351 reader.close();
1352 }
1353 if (buffer.size() < maxBufferSize)
1354 {
1356 buffer, true);
1357 buffer.clear();
1358 }
1359 }
1360
1361//------------------------------------------------------------------------------
1362
1380 public static void manageFragmentCollection(Vertex frag, int fragCounter,
1381 FragmenterParameters settings,
1382 List<Vertex> collector, Logger logger)
1383 throws DENOPTIMException, IllegalArgumentException,
1385 {
1386
1387 if (!filterFragment((Fragment) frag, settings, logger))
1388 {
1389 return;
1390 }
1391
1392 //Compare with list of fragments to ignore
1393 if (settings.getIgnorableFragments().size() > 0)
1394 {
1395 if (settings.getIgnorableFragments().stream()
1396 .anyMatch(ignorable -> ((Fragment)frag)
1397 .isIsomorphicTo(ignorable)))
1398 {
1399 if (logger!=null)
1400 {
1401 logger.log(Level.FINE,"Fragment " + fragCounter
1402 + " is ignorable.");
1403 }
1404 return;
1405 }
1406 }
1407
1408 //Compare with list of fragments to retain
1409 if (settings.getTargetFragments().size() > 0)
1410 {
1411 if (!settings.getTargetFragments().stream()
1412 .anyMatch(ignorable -> ((Fragment)frag)
1413 .isIsomorphicTo(ignorable)))
1414 {
1415 if (logger!=null)
1416 {
1417 logger.log(Level.FINE,"Fragment " + fragCounter
1418 + " doesn't match any target: rejected.");
1419 }
1420 return;
1421 }
1422 }
1423
1424 // Add dummy atoms on linearities
1426 && settings.doAddDuOnLinearity())
1427 {
1429 settings.getLinearAngleLimit());
1430 }
1431
1432 // Management of duplicate fragments:
1433 // -> identify duplicates (isomorphic fragments),
1434 // -> keep one (or more, if we want to sample the isomorphs),
1435 // -> reject the rest.
1436 if (settings.doManageIsomorphicFamilies())
1437 {
1438 synchronized (settings.MANAGEMWSLOTSSLOCK)
1439 {
1440 String mwSlotID = getMWSlotIdentifier(frag,
1441 settings.getMWSlotSize());
1442
1443 File mwFileUnq = settings.getMWSlotFileNameUnqFrags(
1444 mwSlotID);
1445 File mwFileAll = settings.getMWSlotFileNameAllFrags(
1446 mwSlotID);
1447
1448 // Compare this fragment with previously seen ones
1449 Vertex unqVersion = null;
1450 if (mwFileUnq.exists())
1451 {
1452 ArrayList<Vertex> knownFrags =
1454 unqVersion = knownFrags.stream()
1455 .filter(knownFrag ->
1456 ((Fragment)frag).isIsomorphicTo(knownFrag))
1457 .findAny()
1458 .orElse(null);
1459 }
1460 if (unqVersion!=null)
1461 {
1462 // Identify this unique fragment
1463 String isoFamID = unqVersion.getProperty(
1465 .toString();
1466
1467 // Do we already have enough isomorphic family members
1468 // for this fragment?
1469 int sampleSize = settings.getIsomorphsCount()
1470 .get(isoFamID);
1471 if (sampleSize < settings.getIsomorphicSampleSize())
1472 {
1473 // Add this isomorphic version to the sample
1474 frag.setProperty(
1476 isoFamID);
1477 settings.getIsomorphsCount().put(isoFamID,
1478 sampleSize+1);
1479 DenoptimIO.writeVertexToFile(mwFileAll,
1480 FileFormat.VRTXSDF, frag, true);
1481 collector.add(frag);
1482 } else {
1483 // This would be inefficient in the long run
1484 // because it by-passes the splitting by MW.
1485 // Do not do it!
1486 /*
1487 if (logger!=null)
1488 {
1489 logger.log(Level.FINE,"Fragment "
1490 + fragCounter
1491 + " is isomorphic to unique fragment "
1492 + unqVersionID + ", but we already "
1493 + "have a sample of " + sampleSize
1494 + ": ignoring this fragment from now "
1495 + "on.");
1496 }
1497 settings.getIgnorableFragments().add(frag);
1498 */
1499 }
1500 } else {
1501 // This is a never-seen fragment
1502 String isoFamID = settings.newIsomorphicFamilyID();
1503 frag.setProperty(
1505 isoFamID);
1506 settings.getIsomorphsCount().put(isoFamID, 1);
1507 DenoptimIO.writeVertexToFile(mwFileUnq,
1508 FileFormat.VRTXSDF, frag, true);
1509 DenoptimIO.writeVertexToFile(mwFileAll,
1510 FileFormat.VRTXSDF, frag, true);
1511 collector.add(frag);
1512 }
1513 } // end synchronized block
1514 } else {
1515 //If we are here, we did not ask to remove duplicates
1516 collector.add(frag);
1517 }
1518 }
1519
1520//------------------------------------------------------------------------------
1521
1531 public static boolean filterFragment(Fragment frag,
1532 FragmenterParameters settings)
1533 {
1534 return filterFragment(frag, settings, settings.getLogger());
1535 }
1536
1537//------------------------------------------------------------------------------
1538
1550 public static boolean filterFragment(Fragment frag,
1551 FragmenterParameters settings, Logger logger)
1552 {
1553 // Default filtering criteria: get ring of R/*/X/Xx
1554 for (IAtom atm : frag.atoms())
1555 {
1556 if (MoleculeUtils.isElement(atm))
1557 {
1558 continue;
1559 }
1560 String smb = MoleculeUtils.getSymbolOrLabel(atm);
1561 if (DENOPTIMConstants.DUMMYATMSYMBOL.equals(smb))
1562 {
1563 continue;
1564 }
1565 logger.log(Level.FINE,"Removing fragment contains non-element '"
1566 + smb + "'");
1567 return false;
1568 }
1569
1570 if (settings.isWorkingIn3D())
1571 {
1572 // Incomplete 3D fragmentation: an atom has the same coords of an AP.
1573 for (AttachmentPoint ap : frag.getAttachmentPoints())
1574 {
1575 Point3d ap3d = ap.getDirectionVector();
1576 if (ap3d!=null)
1577 {
1578 for (IAtom atm : frag.atoms())
1579 {
1580 Point3d atm3d = MoleculeUtils.getPoint3d(atm);
1581 double dist = ap3d.distance(atm3d);
1582 if (dist < 0.0002)
1583 {
1584 logger.log(Level.FINE,"Removing fragment with AP"
1585 + frag.getIAtomContainer().indexOf(atm)
1586 + " and atom " + MoleculeUtils.getSymbolOrLabel(atm)
1587 + " coincide.");
1588 return false;
1589 }
1590 }
1591 }
1592 }
1593 }
1594 if (settings.doRejectWeirdIsotopes())
1595 {
1596 for (IAtom atm : frag.atoms())
1597 {
1598 if (MoleculeUtils.isElement(atm))
1599 {
1600 // Unconfigured isotope has null mass number
1601 if (atm.getMassNumber() == null)
1602 continue;
1603
1604 String symb = MoleculeUtils.getSymbolOrLabel(atm);
1605 int a = atm.getMassNumber();
1606 try {
1607 IIsotope major = Isotopes.getInstance().getMajorIsotope(symb);
1608 if (a != major.getMassNumber())
1609 {
1610 logger.log(Level.FINE,"Removing fragment containing "
1611 + "isotope "+symb+a+".");
1612 return false;
1613 }
1614 } catch (Throwable t) {
1615 logger.log(Level.WARNING,"Not able to perform Isotope"
1616 + "detection.");
1617 }
1618 }
1619
1620 }
1621 }
1622
1623 // User-controlled filtering criteria
1624
1625 if (settings.getRejectedElements().size() > 0)
1626 {
1627 for (IAtom atm : frag.atoms())
1628 {
1629 String symb = MoleculeUtils.getSymbolOrLabel(atm);
1630 if (settings.getRejectedElements().contains(symb))
1631 {
1632 logger.log(Level.FINE,"Removing fragment containing '"
1633 + symb + "'.");
1634 return false;
1635 }
1636 }
1637 }
1638
1639 if (settings.getRejectedFormulaLessThan().size() > 0
1640 || settings.getRejectedFormulaMoreThan().size() > 0)
1641 {
1642 Map<String,Double> eaMol = FormulaUtils.getElementalanalysis(
1643 frag.getIAtomContainer());
1644
1645 for (Map<String,Double> criterion :
1646 settings.getRejectedFormulaMoreThan())
1647 {
1648 for (String el : criterion.keySet())
1649 {
1650 if (eaMol.containsKey(el))
1651 {
1652 // -0.5 to make it strictly less-than
1653 if (eaMol.get(el) - criterion.get(el) > 0.5)
1654 {
1655 logger.log(Level.FINE,"Removing fragment that "
1656 + "contains too much '" + el + "' "
1657 + "as requested by formula"
1658 + "-based (more-than) settings (" + el
1659 + eaMol.get(el) + " > " + criterion + ").");
1660 return false;
1661 }
1662 }
1663 }
1664 }
1665
1666 Map<String,Double> criterion = settings.getRejectedFormulaLessThan();
1667 for (String el : criterion.keySet())
1668 {
1669 if (!eaMol.containsKey(el))
1670 {
1671 logger.log(Level.FINE,"Removing fragment that does not "
1672 + "contain '" + el + "' as requested by formula"
1673 + "-based (less-than) settings.");
1674 return false;
1675 } else {
1676 // 0.5 to make it strictly more-than
1677 if (eaMol.get(el) - criterion.get(el) < -0.5)
1678 {
1679 logger.log(Level.FINE,"Removing fragment that "
1680 + "contains too little '" + el + "' "
1681 + "as requested by formula"
1682 + "-based settings (" + el
1683 + eaMol.get(el) + " < " + criterion + ").");
1684 return false;
1685 }
1686 }
1687 }
1688
1689 }
1690
1691 if (settings.getRejectedAPClasses().size() > 0)
1692 {
1693 for (APClass apc : frag.getAllAPClasses())
1694 {
1695 for (String s : settings.getRejectedAPClasses())
1696 {
1697 if (apc.toString().startsWith(s))
1698 {
1699 logger.log(Level.FINE,"Removing fragment with APClass "
1700 + apc);
1701 return false;
1702 }
1703 }
1704 }
1705 }
1706
1707 if (settings.getRejectedAPClassCombinations().size() > 0)
1708 {
1709 loopOverCombinations:
1710 for (String[] conditions : settings.getRejectedAPClassCombinations())
1711 {
1712 for (int ip=0; ip<conditions.length; ip++)
1713 {
1714 String condition = conditions[ip];
1715 boolean found = false;
1716 for (APClass apc : frag.getAllAPClasses())
1717 {
1718 if (apc.toString().startsWith(condition))
1719 {
1720 found = true;
1721 continue;
1722 }
1723 }
1724 if (!found)
1725 continue loopOverCombinations;
1726 // Here we do have at least one AP satisfying the condition.
1727 }
1728 // Here we manage or satisfy all conditions. Therefore, we can
1729 // reject this fragment
1730
1731 String allCondsAsString = "";
1732 for (int i=0; i<conditions.length; i++)
1733 allCondsAsString = allCondsAsString + " " + conditions[i];
1734
1735 logger.log(Level.FINE,"Removing fragment with combination of "
1736 + "APClasses matching '" + allCondsAsString + "'.");
1737 return false;
1738 }
1739 }
1740
1741 if (settings.getMaxFragHeavyAtomCount()>0
1742 || settings.getMinFragHeavyAtomCount()>0)
1743 {
1744 int totHeavyAtm = 0;
1745 for (IAtom atm : frag.atoms())
1746 {
1747 if (MoleculeUtils.isElement(atm))
1748 {
1749 String symb = MoleculeUtils.getSymbolOrLabel(atm);
1750 if ((!symb.equals("H")) && (!symb.equals(
1752 totHeavyAtm++;
1753 }
1754 }
1755 if (settings.getMaxFragHeavyAtomCount() > 0
1756 && totHeavyAtm > settings.getMaxFragHeavyAtomCount())
1757 {
1758 logger.log(Level.FINE,"Removing fragment with too many atoms ("
1759 + totHeavyAtm + " < "
1760 + settings.getMaxFragHeavyAtomCount()
1761 + ")");
1762 return false;
1763 }
1764 if (settings.getMinFragHeavyAtomCount() > 0
1765 && totHeavyAtm < settings.getMinFragHeavyAtomCount())
1766 {
1767 logger.log(Level.FINE,"Removing fragment with too few atoms ("
1768 + totHeavyAtm + " < "
1769 + settings.getMinFragHeavyAtomCount()
1770 + ")");
1771 return false;
1772 }
1773 }
1774
1775 if (settings.getFragRejectionSMARTS().size() > 0)
1776 {
1778 settings.getFragRejectionSMARTS());
1779 if (msq.hasProblems())
1780 {
1781 logger.log(Level.WARNING,"Problems evaluating SMARTS-based "
1782 + "rejection criteria. " + msq.getMessage());
1783 }
1784
1785 for (String criterion : settings.getFragRejectionSMARTS().keySet())
1786 {
1787 if (msq.getNumMatchesOfQuery(criterion)>0)
1788 {
1789 logger.log(Level.FINE,"Removing fragment that matches "
1790 + "SMARTS-based rejection criteria '" + criterion
1791 + "'.");
1792 return false;
1793 }
1794 }
1795 }
1796
1797 if (settings.getFragRetentionSMARTS().size() > 0)
1798 {
1800 settings.getFragRetentionSMARTS());
1801 if (msq.hasProblems())
1802 {
1803 logger.log(Level.WARNING,"Problems evaluating SMARTS-based "
1804 + "rejection criteria. " + msq.getMessage());
1805 }
1806
1807 boolean matchesAny = false;
1808 for (String criterion : settings.getFragRetentionSMARTS().keySet())
1809 {
1810 if (msq.getNumMatchesOfQuery(criterion) > 0)
1811 {
1812 matchesAny = true;
1813 break;
1814 }
1815 }
1816 if (!matchesAny)
1817 {
1818 logger.log(Level.FINE,"Removing fragment that does not "
1819 + "match any SMARTS-based retention criteria.");
1820 return false;
1821 }
1822 }
1823 return true;
1824 }
1825
1826//------------------------------------------------------------------------------
1827
1835 public static String getMWSlotIdentifier(Vertex frag, int slotSize)
1836 {
1837 for (IAtom a : frag.getIAtomContainer().atoms())
1838 {
1839 if (a.getImplicitHydrogenCount()==null)
1840 a.setImplicitHydrogenCount(0);
1841 }
1842 double mw = AtomContainerManipulator.getMass(frag.getIAtomContainer());
1843 int slotNum = (int) (mw / (Double.valueOf(slotSize)));
1844 return slotNum*slotSize + "-" + (slotNum+1)*slotSize;
1845 }
1846
1847//------------------------------------------------------------------------------
1848
1849 public static Vertex getRCVForAP(AttachmentPoint ap, APClass rcvApClass)
1850 throws DENOPTIMException
1851 {
1852 IAtomContainer mol = SilentChemObjectBuilder.getInstance()
1853 .newAtomContainer();
1854 Point3d apv = ap.getDirectionVector();
1855 mol.addAtom(new PseudoAtom(RingClosingAttractor.RCALABELPERAPCLASS.get(
1856 rcvApClass),
1857 new Point3d(
1858 Double.valueOf(apv.x),
1859 Double.valueOf(apv.y),
1860 Double.valueOf(apv.z))));
1861
1862 Fragment rcv = new Fragment(mol, BBType.FRAGMENT);
1863 rcv.setAsRCV(true);
1864
1865 Point3d aps = MoleculeUtils.getPoint3d(
1866 ap.getOwner().getIAtomContainer().getAtom(
1867 ap.getAtomPositionNumber()));
1868 rcv.addAP(0, rcvApClass, new Point3d(
1869 Double.valueOf(aps.x),
1870 Double.valueOf(aps.y),
1871 Double.valueOf(aps.z)));
1872 return rcv;
1873 }
1874
1875//------------------------------------------------------------------------------
1876
1877}
General set of constants used in DENOPTIM.
static final Object FORMULASTR
Property name used to store molecular formula as string in an atom container.
static final double FLOATCOMPARISONTOLERANCE
Smallest difference for comparison of double and float numbers.
static final String DUMMYATMSYMBOL
Symbol of dummy atom.
static final Object ISOMORPHICFAMILYID
Property used to store the identifier of the family of isomorphic fragments that owns a fragment.
Exception thrown when the format of a file is not recognized.
static void exploreDGraphForMappings(DGraph graph, IAtomContainer graphIAC, Fragment masterFrag, IAtomContainer masterFragIAC, IAtomContainer mol, Map< IAtom, IAtom > graphToMolMapping)
Recursive function to process edges at any level of embedding to remove the corresponding bonds and c...
static Map< String, List< MatchedBond > > getMatchingBondsAllInOne(IAtomContainer mol, List< CuttingRule > rules, Logger logger)
Identification of the bonds matching a list of SMARTS queries.
static void checkElementalAnalysisAgainstFormula(File input, File output, Logger logger)
Processes all molecules analyzing the composition of the structure in the chemical representation as ...
static List< Vertex > fragmentation(IAtomContainer mol, List< CuttingRule > rules, Logger logger)
Chops one chemical structure by applying the given cutting rules.
static List< Vertex > fragmentation(IAtomContainer mol, List< DGraph > templates, int maxBufferShellSize, Randomizer randomizer, Logger logger)
Chops one chemical structure by applying the given fragmentation templates.
static boolean filterFragment(Fragment frag, FragmenterParameters settings)
Filter fragments according to the criteria defined in the settings.
static Set< IAtom > exploreConnectivity(IAtom seed, IAtomContainer mol)
Explores the connectivity annotating which atoms have been visited.
static boolean prepareMolToFragmentation(IAtomContainer mol, FragmenterParameters settings, int index)
Do any pre-processing on a IAtomContainer meant to be fragmented.
static void manageFragmentCollection(Vertex frag, int fragCounter, FragmenterParameters settings, List< Vertex > collector, Logger logger)
Management of fragments: includes application of fragment filters, rejection rules,...
static Set< IAtom > exploreHapticity(IAtom seed, IAtom centralAtom, ArrayList< IAtom > candidates, IAtomContainer mol)
Identifies non-central atoms involved in the same n-hapto ligand as the seed atom.
static List< Vertex > fragmentation(IAtomContainer mol, FragmenterParameters settings)
Performs fragmentation according to the given settings.
static boolean filterFragment(Fragment frag, FragmenterParameters settings, Logger logger)
Filter fragments according to the criteria defined in the settings.
static void filterStrucutresBySMARTS(File input, Set< String > smarts, File output, Logger logger)
Removes from the structures anyone that matches any of the given SMARTS queries.
static String getMWSlotIdentifier(Vertex frag, int slotSize)
Determines the name of the MW slot to use when comparing the given fragment with previously stored fr...
static Point3d findPointAlignedWithTmpl(AttachmentPoint ap, IAtom graphAtmSrc, IAtomContainer graphIAC, IAtomContainer masterFragIAC, IAtomContainer mol, Map< IAtom, IAtom > graphToMolMapping)
Finds the point on the master fragment that is aligned with the template geometry.
static void manageFragmentCollection(File input, FragmenterParameters settings, File output, Logger logger)
Management of fragments: includes application of fragment filters, rejection rules,...
static Vertex getRCVForAP(AttachmentPoint ap, APClass rcvApClass)
static boolean fragmentationFromGraphs(File input, FragmenterParameters settings, File output, Logger logger)
Performs fragmentation from graphs, i.e., extracts existing fragments from graphs (stored in a file).
static boolean fragmentation(File input, FragmenterParameters settings, File output, Logger logger)
Performs fragmentation according to the given cutting rules.
IAtomContainer getTemplateWithBufferShell(int bufferShellSize)
Produced a new IAtomContainer containing all the atoms needed to define the topology of the original ...
An attachment point (AP) is a possibility to attach a Vertex onto the vertex holding the AP (i....
Container for the list of vertices and the edges that connect them.
Definition: DGraph.java:104
This class represents the edge between two vertices.
Definition: Edge.java:38
Class representing a continuously connected portion of chemical object holding attachment points.
Definition: Fragment.java:61
void addAP(int atomPositionNumber)
Adds an attachment point with a dummy APClass.
Definition: Fragment.java:343
AttachmentPoint addAPOnAtom(IAtom srcAtm, APClass apc, Point3d vector)
Add an attachment point to the specifies atom.
Definition: Fragment.java:424
List< AttachmentPoint > getAttachmentPoints()
Definition: Fragment.java:1141
Fragment clone()
Returns a deep copy of this fragments.
Definition: Fragment.java:733
Iterable< IAtom > atoms()
Definition: Fragment.java:822
IAtomContainer getIAtomContainer()
Definition: Fragment.java:788
void removeAtoms(Collection< IAtom > atoms)
Removes a list of atoms and updates the list of attachment points.
Definition: Fragment.java:913
A vertex is a data structure that has an identity and holds a list of AttachmentPoints.
Definition: Vertex.java:61
ArrayList< APClass > getAllAPClasses()
Returns the list of all APClasses present on this vertex.
Definition: Vertex.java:792
void setAsRCV(boolean isRCV)
Definition: Vertex.java:274
Object getProperty(Object property)
Definition: Vertex.java:1223
abstract IAtomContainer getIAtomContainer()
void setProperty(Object key, Object property)
Definition: Vertex.java:1235
The RingClosingAttractor represent the available valence/connection that allows to close a ring.
static final HashMap< APClass, String > RCALABELPERAPCLASS
Conventional labels for attractor pseudoatom.
Utility methods for input/output.
static File writeVertexesToFile(File file, FileFormat format, List< Vertex > vertexes)
Writes vertexes to file.
static void writeSDFFile(String fileName, IAtomContainer mol)
Writes IAtomContainer to SDF file.
static File writeVertexToFile(File file, FileFormat format, Vertex vertex, boolean append)
Writes vertexes to file.
static ArrayList< DGraph > readDENOPTIMGraphsFromFile(File inFile)
Reads a list of DGraphs from file.
static ArrayList< Vertex > readVertexes(File file, Vertex.BBType bbt)
Reads Vertexes from any file that can contain such items.
An iterator that take IAtomContainers from a file, possibly using an available iterating reader,...
void close()
Close the memory-efficient iterator if any is open.
Tool to build build three-dimensional (3D) tree-like molecular structures from DGraph.
IAtomContainer convertGraphTo3DAtomContainer(DGraph graph)
Created a three-dimensional molecular representation from a given DGraph.
Logger getLogger()
Get the name of the program specific logger.
A cutting rule with three SMARTS queries (atom 1, bond, atom2) and options.
Parameters controlling execution of the fragmenter.
boolean doRejectWeirdIsotopes
Flag requesting to reject fragments with minor isotopes.
boolean addExplicitH
Flag requesting to add explicit H atoms.
Boolean satisfiesRuleOptions
Flag indicating that we have checked the additional option from the cutting rule (otherwise this flag...
Utilities for working in the Cartesian space.
static Vector3d getNormalDirection(Vector3d dir)
Generate a vector that is perpendicular to the given one.
static Vector3d getVectorFromTo(Point3d a, Point3d b)
Creates an object Vector3d that originates from point a and goes to point b.
static void rotatedVectorWAxisAngle(Vector3d v, Vector3d axis, double ang)
Rotate a vector according to a given rotation axis and angle.
Toll to add/remove dummy atoms from linearities or multi-hapto sites.
static void addDummiesOnLinearities(Fragment frag, double angLim)
Append dummy atoms on otherwise linear arrangements of atoms.
Utilities for manipulating molecular formulas.
static boolean compareFormulaAndElementalAnalysis(String formula, IAtomContainer mol)
Compares the molecular formula formatted as from the Cambridge Structural Database (CSD) against the ...
static Map< String, Double > getElementalanalysis(IAtomContainer mol)
Threads Deuterium as a different element than Hydrogen.
Container of lists of atoms matching a list of SMARTS.
Map< String, Mappings > getAllMatches()
int getNumMatchesOfQuery(String query)
Some useful math operations.
Definition: MathUtils.java:39
static double angle(Point3d a, Point3d b, Point3d c)
Calculate the angle between the 3 points.
Definition: MathUtils.java:284
Utilities for molecule conversion.
static void setZeroImplicitHydrogensToAllAtoms(IAtomContainer iac)
Sets zero implicit hydrogen count to all atoms.
static IAtomContainer makeSameAs(IAtomContainer mol)
Constructs a copy of an atom container, i.e., a molecule that reflects the one given in the input arg...
static int getDimensions(IAtomContainer mol)
Determines the dimensionality of the given chemical object.
static String getSymbolOrLabel(IAtom atm)
Gets either the elemental symbol (for standard atoms) of the label (for pseudo-atoms).
static List< Map< IAtom, IAtom > > findUniqueAtomMappings(IAtomContainer substructure, IAtomContainer mol, Logger logger)
Finds the maximum common substructure (MCS) between two molecules.
static void ensureNoUnsetBondOrders(IAtomContainer iac)
Sets bond order = single to all otherwise unset bonds.
static void explicitHydrogens(IAtomContainer mol)
Converts all the implicit hydrogens to explicit.
static Point3d getPoint3d(IAtom atm)
Return the 3D coordinates, if present.
static boolean isElement(IAtom atom)
Check element symbol corresponds to real element of Periodic Table.
Tool to generate random numbers and random decisions.
Definition: Randomizer.java:35
File formats identified by DENOPTIM.
Definition: FileFormat.java:32
The type of building block.
Definition: Vertex.java:86
FRG_PARAMS
Parameters controlling the fragmenter.