$darkmode
DENOPTIM
FragmenterTools.java
Go to the documentation of this file.
1package denoptim.fragmenter;
2
3import java.io.File;
4import java.io.FileInputStream;
5import java.io.IOException;
6import java.util.ArrayList;
7import java.util.HashMap;
8import java.util.HashSet;
9import java.util.List;
10import java.util.Map;
11import java.util.Set;
12import java.util.logging.Level;
13import java.util.logging.Logger;
14
15import javax.vecmath.Point3d;
16
17import org.openscience.cdk.Bond;
18import org.openscience.cdk.DefaultChemObjectBuilder;
19import org.openscience.cdk.PseudoAtom;
20import org.openscience.cdk.config.Isotopes;
21import org.openscience.cdk.exception.CDKException;
22import org.openscience.cdk.interfaces.IAtom;
23import org.openscience.cdk.interfaces.IAtomContainer;
24import org.openscience.cdk.interfaces.IBond;
25import org.openscience.cdk.interfaces.IIsotope;
26import org.openscience.cdk.io.iterator.IteratingSDFReader;
27import org.openscience.cdk.isomorphism.Mappings;
28import org.openscience.cdk.isomorphism.Pattern;
29import org.openscience.cdk.silent.SilentChemObjectBuilder;
30import org.openscience.cdk.tools.manipulator.AtomContainerManipulator;
31
32import denoptim.constants.DENOPTIMConstants;
33import denoptim.exception.DENOPTIMException;
34import denoptim.files.FileFormat;
35import denoptim.files.UndetectedFileFormatException;
36import denoptim.graph.APClass;
37import denoptim.graph.AttachmentPoint;
38import denoptim.graph.DGraph;
39import denoptim.graph.Edge;
40import denoptim.graph.Fragment;
41import denoptim.graph.Template;
42import denoptim.graph.Vertex;
43import denoptim.graph.Vertex.BBType;
44import denoptim.graph.rings.RingClosingAttractor;
45import denoptim.io.DenoptimIO;
46import denoptim.io.IteratingAtomContainerReader;
47import denoptim.molecularmodeling.ThreeDimTreeBuilder;
48import denoptim.programs.RunTimeParameters.ParametersType;
49import denoptim.programs.fragmenter.CuttingRule;
50import denoptim.programs.fragmenter.FragmenterParameters;
51import denoptim.programs.fragmenter.MatchedBond;
52import denoptim.utils.DummyAtomHandler;
53import denoptim.utils.FormulaUtils;
54import denoptim.utils.ManySMARTSQuery;
55import denoptim.utils.MoleculeUtils;
56import denoptim.utils.Randomizer;
57
58public class FragmenterTools
59{
60
61//------------------------------------------------------------------------------
62
77 public static void checkElementalAnalysisAgainstFormula(File input,
78 File output, Logger logger)
79 throws DENOPTIMException, IOException
80 {
81 FileInputStream fis = new FileInputStream(input);
82 IteratingSDFReader reader = new IteratingSDFReader(fis,
83 DefaultChemObjectBuilder.getInstance());
84
85 int index = -1;
86 int maxBufferSize = 2000;
87 ArrayList<IAtomContainer> buffer = new ArrayList<IAtomContainer>(500);
88 try {
89 while (reader.hasNext())
90 {
91 index++;
92 if (logger!=null)
93 {
94 logger.log(Level.FINE,"Checking elemental analysis of "
95 + "structure " + index);
96 }
97 IAtomContainer mol = reader.next();
98 if (mol.getProperty(DENOPTIMConstants.FORMULASTR)==null)
99 {
100 throw new Error("Property '" + DENOPTIMConstants.FORMULASTR
101 + "' not found in molecule " + index + " in file "
102 + input + ". Cannot compare formula with elemental"
103 + "analysis.");
104 }
105 String formula = mol.getProperty(DENOPTIMConstants.FORMULASTR)
106 .toString();
107
109 mol, logger))
110 {
111 buffer.add(mol);
112 } else {
113 if (logger!=null)
114 {
115 logger.log(Level.INFO,"Inconsistency between elemental "
116 + "analysis of structure and molecular formula."
117 + " Rejecting structure " + index + ": "
118 + mol.getTitle());
119 }
120 }
121
122 // If max buffer size is reached, then bump to file
123 if (buffer.size() >= maxBufferSize)
124 {
125 DenoptimIO.writeSDFFile(output.getAbsolutePath(), buffer,
126 true);
127 buffer.clear();
128 }
129 }
130 }
131 finally {
132 reader.close();
133 }
134 if (buffer.size() < maxBufferSize)
135 {
136 DenoptimIO.writeSDFFile(output.getAbsolutePath(), buffer, true);
137 buffer.clear();
138 }
139 }
140
141//------------------------------------------------------------------------------
142
143
157 public static boolean prepareMolToFragmentation(IAtomContainer mol,
158 FragmenterParameters settings, int index)
159 {
160 try
161 {
162 if (settings.addExplicitH())
163 {
165 } else {
167 }
169 } catch (CDKException e)
170 {
171 if (e.getMessage().contains("Cannot assign Kekulé structure"))
172 {
173 if (!settings.acceptUnsetToSingeBO())
174 {
175 settings.getLogger().log(Level.WARNING,"Some bond order "
176 + "are unset and attempt to kekulize the "
177 + "system has failed "
178 + "for structure " + index + ". "
179 + "This hampers use of SMARTS queries, which "
180 + "may very well "
181 + "not work as expected. Structure " + index
182 + " will be rejected. "
183 + "You can avoid rejection by using "
184 + "keyword "
185 + ParametersType.FRG_PARAMS.getKeywordRoot()
186 + "UNSETTOSINGLEBO, but you'll "
187 + "still be using a peculiar connectivity "
188 + "table were "
189 + "many bonds are artificially marked as "
190 + "single to "
191 + "avoid use of 'UNSET' bond order. "
192 + "Further details on the problem: "
193 + e.getMessage());
194 return false;
195 } else {
196 settings.getLogger().log(Level.WARNING,"Failed "
197 + "kekulization "
198 + "for structure " + index
199 + " but UNSETTOSINGLEBO "
200 + "keyword used. Forcing use of single bonds to "
201 + "replace bonds with unset order.");
202 for (IBond bnd : mol.bonds())
203 {
204 if (bnd.getOrder().equals(IBond.Order.UNSET))
205 {
206 bnd.setOrder(IBond.Order.SINGLE);
207 }
208 }
209 }
210 }
211 }
212 return true;
213 }
214
215//------------------------------------------------------------------------------
216
228 public static void filterStrucutresBySMARTS(File input, Set<String> smarts,
229 File output, Logger logger)
230 throws DENOPTIMException, IOException
231 {
232 FileInputStream fis = new FileInputStream(input);
233 IteratingSDFReader reader = new IteratingSDFReader(fis,
234 DefaultChemObjectBuilder.getInstance());
235
236 int i = -1;
237 Map<String, String> smartsMap = new HashMap<String, String>();
238 for (String s : smarts)
239 {
240 i++;
241 smartsMap.put("prefilter-"+i, s);
242 }
243
244 int index = -1;
245 int maxBufferSize = 2000;
246 ArrayList<IAtomContainer> buffer = new ArrayList<IAtomContainer>(500);
247 try {
248 while (reader.hasNext())
249 {
250 index++;
251 if (logger!=null)
252 {
253 logger.log(Level.FINE,"Prefiltering structure " + index);
254 }
255 IAtomContainer mol = reader.next();
256
257 ManySMARTSQuery msq = new ManySMARTSQuery(mol, smartsMap);
258 if (msq.hasProblems())
259 {
260 String msg = "WARNING! Problems while searching for "
261 + "specific atoms/bonds using SMARTS: "
262 + msq.getMessage();
263 throw new DENOPTIMException(msg,msq.getProblem());
264 }
265 Map<String, Mappings> allMatches = msq.getAllMatches();
266
267 if (allMatches.size()==0)
268 {
269 buffer.add(mol);
270 } else {
271 String hits = "";
272 for (String s : allMatches.keySet())
273 hits = hits + DenoptimIO.NL + smartsMap.get(s);
274 if (logger!=null)
275 {
276 logger.log(Level.INFO,"Found match for " + hits
277 + "Rejecting structure " + index + ": "
278 + mol.getTitle());
279 }
280 }
281
282 // If max buffer size is reached, then bump to file
283 if (buffer.size() >= maxBufferSize)
284 {
285 DenoptimIO.writeSDFFile(output.getAbsolutePath(), buffer,
286 true);
287 buffer.clear();
288 }
289 }
290 } finally {
291 reader.close();
292 }
293 if (buffer.size() < maxBufferSize)
294 {
295 DenoptimIO.writeSDFFile(output.getAbsolutePath(), buffer, true);
296 buffer.clear();
297 }
298 }
299
300//------------------------------------------------------------------------------
301
317 public static boolean fragmentationFromGraphs(File input,
318 FragmenterParameters settings, File output, Logger logger)
319 throws DENOPTIMException, Exception
320 {
321 int totalProd = 0;
322 for (DGraph graph : DenoptimIO.readDENOPTIMGraphsFromFile(input))
323 {
324 List<Vertex> fragments = graph.getVertexList();
325
326 // Post-fragmentation processing of fragments
327 List<Vertex> keptFragments = new ArrayList<Vertex>();
328 int fragCounter = 0;
329 for (Vertex frag : fragments)
330 {
331 // Add metadata
332 fragCounter++;
333 manageFragmentCollection(frag, fragCounter, settings,
334 keptFragments, logger);
335 }
336 if (logger!=null)
337 {
338 logger.log(Level.FINE,"Fragments surviving post-"
339 + "processing: " + keptFragments.size());
340 }
341
342 if (keptFragments.size()>0)
343 {
344 totalProd += keptFragments.size();
346 keptFragments, true);
347 }
348 }
349 return totalProd>0;
350 }
351
352//-----------------------------------------------------------------------------
353
373 public static boolean fragmentation(File input, FragmenterParameters settings,
374 File output, Logger logger) throws CDKException, IOException,
375 DENOPTIMException, IllegalArgumentException, UndetectedFileFormatException
376 {
379
380 int totalProd = 0;
381 int totalKept = 0;
382 int index = -1;
383 try {
384 while (iterator.hasNext())
385 {
386 index++;
387 if (logger!=null)
388 {
389 logger.log(Level.FINE,"Fragmenting structure " + index);
390 }
391 IAtomContainer mol = iterator.next();
392 String molName = "noname-mol" + index;
393 if (mol.getTitle()!=null && !mol.getTitle().isBlank())
394 molName = mol.getTitle();
395
396 // Generate the fragments
397 List<Vertex> fragments = new ArrayList<Vertex>();
398 if (settings.getFragmentationTmpls().size()>0)
399 {
400 fragments = fragmentation(mol, settings.getFragmentationTmpls(),
401 settings.getRandomizer(), logger);
402 } else {
403 fragments = fragmentation(mol, settings.getCuttingRules(),
404 logger);
405 }
406 if (logger!=null)
407 {
408 logger.log(Level.FINE,"Fragmentation produced "
409 + fragments.size() + " fragments.");
410 }
411 totalProd += fragments.size();
412
413 // Post-fragmentation processing of fragments
414 List<Vertex> keptFragments = new ArrayList<Vertex>();
415 int fragCounter = 0;
416 for (Vertex frag : fragments)
417 {
418 // Add metadata
419 String fragIdStr = "From_" + molName + "_" + fragCounter;
420 frag.setProperty("cdk:Title", fragIdStr);
421 fragCounter++;
422 manageFragmentCollection(frag, fragCounter, settings,
423 keptFragments, logger);
424 }
425 if (logger!=null)
426 {
427 logger.log(Level.FINE,"Fragments surviving post-"
428 + "processing: " + keptFragments.size());
429 }
430 totalKept += keptFragments.size();
431 if (!settings.doManageIsomorphicFamilies() && totalKept>0)
432 {
434 keptFragments,true);
435 }
436 }
437 } finally {
438 iterator.close();
439 }
440
441 // Did we actually produce anything? We might not...
442 if (totalProd==0)
443 {
444 if (logger!=null)
445 {
446 logger.log(Level.WARNING,"No fragment produced. Cutting rules "
447 + "were ineffective on the given structures.");
448 }
449 return false;
450 } else if (totalKept==0)
451 {
452 if (logger!=null)
453 {
454 logger.log(Level.WARNING,"No fragment kept out of " + totalProd
455 + " produced fragments. Filtering criteria might be "
456 + "too restrictive.");
457 }
458 return false;
459 }
460 return true;
461 }
462
463//------------------------------------------------------------------------------
464
475 private static IAtomContainer reduceTemplateToVertexBoundaries(IAtomContainer templateMol)
476 {
477 // Find all boundary atoms: atoms connected to atoms with different vertex IDs
478 Set<IAtom> boundaryAtoms = new HashSet<>();
479 Map<IAtom, Long> atomToVertexId = new HashMap<>();
480
481 // First pass: collect vertex IDs and identify boundary atoms
482 for (IAtom atom : templateMol.atoms())
483 {
484 Object vidProp = atom.getProperty(DENOPTIMConstants.ATMPROPVERTEXID);
485 if (vidProp == null)
486 {
487 // If no vertex ID, keep all atoms (can't optimize)
488 return templateMol;
489 }
490 Long vid = Long.parseLong(vidProp.toString());
491 atomToVertexId.put(atom, vid);
492
493 // Check neighbors for different vertex IDs
494 List<IAtom> neighbors = templateMol.getConnectedAtomsList(atom);
495 for (IAtom neighbor : neighbors)
496 {
497 Object nbrVidProp = neighbor.getProperty(DENOPTIMConstants.ATMPROPVERTEXID);
498 if (nbrVidProp != null)
499 {
500 Long nbrVid = Long.parseLong(nbrVidProp.toString());
501 if (!vid.equals(nbrVid))
502 {
503 boundaryAtoms.add(atom);
504 boundaryAtoms.add(neighbor);
505 break;
506 }
507 }
508 }
509 }
510
511 // If no boundary atoms found, return original (all atoms have same vertex ID)
512 if (boundaryAtoms.isEmpty())
513 {
514 return templateMol;
515 }
516
517 // Find minimal chains connecting boundary atoms using BFS
518 Set<IAtom> atomsToKeep = new HashSet<>(boundaryAtoms);
519
520 // For each pair of boundary atoms, find shortest path
521 List<IAtom> boundaryList = new ArrayList<>(boundaryAtoms);
522 for (int i = 0; i < boundaryList.size(); i++)
523 {
524 for (int j = i + 1; j < boundaryList.size(); j++)
525 {
526 IAtom start = boundaryList.get(i);
527 IAtom end = boundaryList.get(j);
528
529 // Find shortest path between these boundary atoms
530 List<IAtom> path = MoleculeUtils.findShortestPath(templateMol, start, end, atomToVertexId);
531 if (path != null)
532 {
533 atomsToKeep.addAll(path);
534 }
535 }
536 }
537
538 // Create reduced molecule with only atoms to keep
539 IAtomContainer reduced = SilentChemObjectBuilder.getInstance().newAtomContainer();
540 Map<IAtom, IAtom> originalToReduced = new HashMap<>(); // original atom -> reduced atom
541
542 // Add atoms with original index stored as property
543 for (IAtom originalAtom : atomsToKeep)
544 {
545 IAtom reducedAtom = originalAtom.getBuilder().newInstance(IAtom.class, originalAtom);
546 reduced.addAtom(reducedAtom);
547 originalToReduced.put(originalAtom, reducedAtom);
548
549 // Store the original atom index as a property
550 int originalIndex = templateMol.indexOf(originalAtom);
551 reducedAtom.setProperty("DENOPTIM_ORIGINAL_ATOM_INDEX", originalIndex);
552
553 // Copy all other properties
554 for (Object key : originalAtom.getProperties().keySet())
555 {
556 if (!key.equals("DENOPTIM_ORIGINAL_ATOM_INDEX"))
557 {
558 reducedAtom.setProperty(key, originalAtom.getProperty(key));
559 }
560 }
561 }
562
563 // Add bonds between kept atoms
564 for (IBond bond : templateMol.bonds())
565 {
566 IAtom atom1 = bond.getAtom(0);
567 IAtom atom2 = bond.getAtom(1);
568
569 if (atomsToKeep.contains(atom1) && atomsToKeep.contains(atom2))
570 {
571 IBond newBond = bond.getBuilder().newInstance(IBond.class,
572 originalToReduced.get(atom1), originalToReduced.get(atom2), bond.getOrder());
573 reduced.addBond(newBond);
574 }
575 }
576
577 return reduced;
578 }
579
580
581//------------------------------------------------------------------------------
582
591 public static List<Vertex> fragmentation(IAtomContainer mol,
592 List<DGraph> templates, Randomizer randomizer, Logger logger)
593 throws DENOPTIMException
594 {
595 List<Vertex> fragments = new ArrayList<Vertex>();
596 int bestMatch = -1;
597 for (DGraph template : templates)
598 {
599 ThreeDimTreeBuilder tb = new ThreeDimTreeBuilder(logger, randomizer);
600 IAtomContainer templateMol = tb.convertGraphTo3DAtomContainer(template,
601 false, true, true);
602
603 // Optimize: create reduced templateMol for faster isomorphism search
604 // Keep full templateMol for exploreDGraphForMappings
605 IAtomContainer reducedTemplateMol = reduceTemplateToVertexBoundaries(templateMol);
606
607 List<Map<IAtom,IAtom>> atomMappings;
608
609 // Check if reduction actually happened
610 if (reducedTemplateMol == templateMol)
611 {
612 // No reduction possible, use full template directly
613 atomMappings = MoleculeUtils.findUniqueAtomMappings(templateMol, mol, logger);
614 }
615 else
616 {
617 // Use reduced template for isomorphism search (faster)
618 List<Map<IAtom,IAtom>> reducedAtomMappings = MoleculeUtils.findUniqueAtomMappings(
619 reducedTemplateMol, mol, logger);
620
621 // Convert mappings from reducedTemplateMol:mol to templateMol:mol using stored indices
622 atomMappings = new ArrayList<>();
623 for (Map<IAtom,IAtom> reducedMapping : reducedAtomMappings)
624 {
625 Map<IAtom,IAtom> fullMapping = new HashMap<>();
626 for (Map.Entry<IAtom,IAtom> entry : reducedMapping.entrySet())
627 {
628 IAtom reducedAtom = entry.getKey();
629 IAtom molAtom = entry.getValue();
630
631 // Get original atom index from reduced atom property
632 Object indexObj = reducedAtom.getProperty("DENOPTIM_ORIGINAL_ATOM_INDEX");
633 if (indexObj != null)
634 {
635 int originalIndex = ((Number) indexObj).intValue();
636 IAtom originalAtom = templateMol.getAtom(originalIndex);
637 if (originalAtom != null)
638 {
639 fullMapping.put(originalAtom, molAtom);
640 }
641 }
642 }
643 if (!fullMapping.isEmpty())
644 {
645 atomMappings.add(fullMapping);
646 }
647 }
648 }
649
650 Fragment masterFrag = new Fragment(mol, BBType.UNDEFINED);
651 IAtomContainer masterFragIAC = masterFrag.getIAtomContainer();
652
653 // For each atom mapping, replace bonds corresponding to edges
654 // (at any embedding level) with attachment pointsand annotate embedding level
655 for (Map<IAtom,IAtom> atomMapping : atomMappings)
656 {
657 try {
658 exploreDGraphForMappings(template, templateMol,
659 masterFrag, masterFragIAC, mol, atomMapping);
660 } catch (Throwable e) {
661 e.printStackTrace();
662 logger.log(Level.WARNING, "Error while exploring the template graph: " + e.getMessage());
663 continue;
664 }
665 }
666
667 // Extract isolated fragments
668 List<Vertex> locfragments = new ArrayList<Vertex>();
669 Set<Integer> doneAlready = new HashSet<Integer>();
670 for (int idx=0 ; idx<masterFrag.getAtomCount(); idx++)
671 {
672 if (doneAlready.contains(idx))
673 continue;
674
675 Fragment cloneOfMaster = masterFrag.clone();
676 IAtomContainer iac = cloneOfMaster.getIAtomContainer();
677 Set<IAtom> atmsToKeep = exploreConnectivity(iac.getAtom(idx), iac);
678 atmsToKeep.stream().forEach(atm -> doneAlready.add(iac.indexOf(atm)));
679
680 Set<IAtom> atmsToRemove = new HashSet<IAtom>();
681 for (IAtom atm : cloneOfMaster.atoms())
682 {
683 if (!atmsToKeep.contains(atm))
684 {
685 atmsToRemove.add(atm);
686 }
687 }
688 cloneOfMaster.removeAtoms(atmsToRemove);
689 if (cloneOfMaster.getAttachmentPoints().size()>0)
690 locfragments.add(cloneOfMaster);
691 }
692
693 if (locfragments.size() > bestMatch)
694 {
695 bestMatch = locfragments.size();
696 fragments = locfragments;
697 }
698 }
699 return fragments;
700 }
701
702//------------------------------------------------------------------------------
703
708 private static void exploreDGraphForMappings(DGraph graph, IAtomContainer graphIAC,
709 Fragment masterFrag, IAtomContainer masterFragIAC, IAtomContainer mol,
710 Map<IAtom,IAtom> graphToMolMapping)
711 {
712 for (Edge edge : graph.getEdgeList())
713 {
714 //TODO: what about RCVs?
715 IAtom graphAtmSrc = graphIAC.getAtom(edge.getSrcAP().getAtomPositionNumberInMol());
716 IAtom graphAtmTrg = graphIAC.getAtom(edge.getTrgAP().getAtomPositionNumberInMol());
717
718 IAtom masterFragAtmSrc = masterFragIAC.getAtom(mol.indexOf(graphToMolMapping.get(graphAtmSrc)));
719 IAtom masterFragAtmTrg = masterFragIAC.getAtom(mol.indexOf(graphToMolMapping.get(graphAtmTrg)));
720
721 IBond bnd = masterFragIAC.getBond(masterFragAtmSrc, masterFragAtmTrg);
722 if (bnd != null)
723 {
724 masterFragIAC.removeBond(bnd);
725 }
726
727 masterFrag.addAPOnAtom(masterFragAtmSrc,
728 edge.getSrcAP().getAPClass(),
729 MoleculeUtils.getPoint3d(masterFragAtmTrg));
730 masterFrag.addAPOnAtom(masterFragAtmTrg,
731 edge.getTrgAP().getAPClass(),
732 MoleculeUtils.getPoint3d(masterFragAtmSrc));
733 }
734
735 for (Vertex v : graph.getVertexList())
736 {
737 if (v instanceof Template)
738 {
739 DGraph innerGraph = ((Template) v).getInnerGraph();
740 exploreDGraphForMappings(innerGraph, graphIAC, masterFrag, masterFragIAC, mol, graphToMolMapping);
741 }
742 }
743 }
744
745//------------------------------------------------------------------------------
746
755 public static List<Vertex> fragmentation(IAtomContainer mol,
756 List<CuttingRule> rules, Logger logger) throws DENOPTIMException
757 {
758 Fragment masterFrag = new Fragment(mol,BBType.UNDEFINED);
759 IAtomContainer fragsMol = masterFrag.getIAtomContainer();
760
761 // Identify bonds
762 Map<String, List<MatchedBond>> matchingbonds =
763 FragmenterTools.getMatchingBondsAllInOne(fragsMol,rules,logger);
764
765 // Select bonds to cut and what rule to use for cutting them
766 int cutId = -1;
767 for (CuttingRule rule : rules) // NB: iterator follows rule's priority
768 {
769 String ruleName = rule.getName();
770
771 // Skip unmatched rules
772 if (!matchingbonds.keySet().contains(ruleName))
773 continue;
774
775 for (MatchedBond tb: matchingbonds.get(ruleName))
776 {
777 IAtom atmA = tb.getAtmSubClass0();
778 IAtom atmB = tb.getAtmSubClass1();
779
780 //ignore if bond already broken
781 if (!fragsMol.getConnectedAtomsList(atmA).contains(atmB))
782 {
783 continue;
784 }
785
786 //treatment of n-hapto ligands
787 if (rule.isHAPTO())
788 {
789 // Get central atom (i.e., the "mono-hapto" side,
790 // typically the metal)
791 // As a convention the central atom has subclass '0'
792 IAtom centralAtm = atmA;
793
794 // Get list of candidates for hapto-system:
795 // they have same cutting Rule and central metal
796 ArrayList<IAtom> candidatesForHapto = new ArrayList<IAtom>();
797 for (MatchedBond tbForHapto : matchingbonds.get(ruleName))
798 {
799 //Consider only bond involving same central atom
800 if (tbForHapto.getAtmSubClass0() == centralAtm)
801 candidatesForHapto.add(tbForHapto.getAtmSubClass1());
802 }
803
804 // Select atoms in n-hapto system: contiguous neighbors with
805 // same type of bond with the same central atom.
806 Set<IAtom> atmsInHapto = new HashSet<IAtom>();
807 atmsInHapto.add(tb.getAtmSubClass1());
808 atmsInHapto = exploreHapticity(tb.getAtmSubClass1(),
809 centralAtm, candidatesForHapto, fragsMol);
810 if (atmsInHapto.size() == 1)
811 {
812 logger.log(Level.WARNING,"Unable to find more than one "
813 + "bond involved in high-hapticity ligand! "
814 + "Bond ignored.");
815 continue;
816 }
817
818 // Check existence of all bonds involved in multi-hapto system
819 boolean isSystemIntact = true;
820 for (IAtom ligAtm : atmsInHapto)
821 {
822 List<IAtom> nbrsOfLigAtm =
823 fragsMol.getConnectedAtomsList(ligAtm);
824 if (!nbrsOfLigAtm.contains(centralAtm))
825 {
826 isSystemIntact = false;
827 break;
828 }
829 }
830
831 // If not, it means that another rule already acted on the
832 // system thus kill this attempt without generating du-atom
833 if (!isSystemIntact)
834 continue;
835
836 // A dummy atom will be used to define attachment point of
837 // ligand with high hapticity
838 Point3d dummyP3d = new Point3d(); //Used also for 2D
839 for (IAtom ligAtm : atmsInHapto)
840 {
841 Point3d ligP3d = MoleculeUtils.getPoint3d(ligAtm);
842 dummyP3d.x = dummyP3d.x + ligP3d.x;
843 dummyP3d.y = dummyP3d.y + ligP3d.y;
844 dummyP3d.z = dummyP3d.z + ligP3d.z;
845 }
846
847 dummyP3d.x = dummyP3d.x / (double) atmsInHapto.size();
848 dummyP3d.y = dummyP3d.y / (double) atmsInHapto.size();
849 dummyP3d.z = dummyP3d.z / (double) atmsInHapto.size();
850
851 //Add Dummy atom to molecular object
852 //if no other Du is already in the same position
853 IAtom dummyAtm = null;
854 for (IAtom oldDu : fragsMol.atoms())
855 {
858 {
859 Point3d oldDuP3d = oldDu.getPoint3d();
860 if (oldDuP3d.distance(dummyP3d) < 0.002)
861 {
862 dummyAtm = oldDu;
863 break;
864 }
865 }
866 }
867
868 if (dummyAtm==null)
869 {
870 dummyAtm = new PseudoAtom(DENOPTIMConstants.DUMMYATMSYMBOL);
871 dummyAtm.setPoint3d(dummyP3d);
872 fragsMol.addAtom(dummyAtm);
873 }
874
875 // Modify connectivity of atoms involved in high-hapticity
876 // coordination creation of Du-to-ATM bonds
877 // By internal convention the bond order is "SINGLE".
878 IBond.Order border = IBond.Order.valueOf("SINGLE");
879
880 for (IAtom ligAtm : atmsInHapto)
881 {
882 List<IAtom> nbrsOfDu = fragsMol.getConnectedAtomsList(
883 dummyAtm);
884 if (!nbrsOfDu.contains(ligAtm))
885 {
886 // Add bond with dummy
887 Bond bnd = new Bond(dummyAtm,ligAtm,border);
888 fragsMol.addBond(bnd);
889 }
890 // Remove bonds between central and coordinating atoms
891 IBond oldBnd = fragsMol.getBond(centralAtm,ligAtm);
892 fragsMol.removeBond(oldBnd);
893 }
894
895 // NB: by convention the "first" class (i.e., the ???:0 class)
896 // is always on the central atom.
897 AttachmentPoint apA = masterFrag.addAPOnAtom(centralAtm,
898 rule.getAPClass0(),
899 MoleculeUtils.getPoint3d(dummyAtm));
900 AttachmentPoint apB = masterFrag.addAPOnAtom(dummyAtm,
901 rule.getAPClass1(),
902 MoleculeUtils.getPoint3d(centralAtm));
903
904 cutId++;
905 apA.setCutId(cutId);
906 apB.setCutId(cutId);
907 } else {
908 //treatment of mono-hapto ligands
909 IBond bnd = fragsMol.getBond(atmA,atmB);
910 fragsMol.removeBond(bnd);
911
912 AttachmentPoint apA = masterFrag.addAPOnAtom(atmA,
913 rule.getAPClass0(),
915 AttachmentPoint apB = masterFrag.addAPOnAtom(atmB,
916 rule.getAPClass1(),
918
919 cutId++;
920 apA.setCutId(cutId);
921 apB.setCutId(cutId);
922 } //end of if (hapticity>1)
923 } //end of loop over matching bonds
924 } //end of loop over rules
925
926 // Extract isolated fragments
927 List<Vertex> fragments = new ArrayList<Vertex>();
928 Set<Integer> doneAlready = new HashSet<Integer>();
929 for (int idx=0 ; idx<masterFrag.getAtomCount(); idx++)
930 {
931 if (doneAlready.contains(idx))
932 continue;
933
934 Fragment cloneOfMaster = masterFrag.clone();
935 IAtomContainer iac = cloneOfMaster.getIAtomContainer();
936 Set<IAtom> atmsToKeep = exploreConnectivity(iac.getAtom(idx), iac);
937 atmsToKeep.stream().forEach(atm -> doneAlready.add(iac.indexOf(atm)));
938
939 Set<IAtom> atmsToRemove = new HashSet<IAtom>();
940 for (IAtom atm : cloneOfMaster.atoms())
941 {
942 if (!atmsToKeep.contains(atm))
943 {
944 atmsToRemove.add(atm);
945 }
946 }
947 cloneOfMaster.removeAtoms(atmsToRemove);
948 if (cloneOfMaster.getAttachmentPoints().size()>0)
949 fragments.add(cloneOfMaster);
950 }
951
952 return fragments;
953 }
954
955//------------------------------------------------------------------------------
969 static Set<IAtom> exploreHapticity(IAtom seed, IAtom centralAtom,
970 ArrayList<IAtom> candidates, IAtomContainer mol)
971 {
972 Set<IAtom> atmsInHapto = new HashSet<IAtom>();
973 atmsInHapto.add(seed);
974 ArrayList<IAtom> toVisitAtoms = new ArrayList<IAtom>();
975 toVisitAtoms.add(seed);
976 ArrayList<IAtom> visitedAtoms = new ArrayList<IAtom>();
977 while (toVisitAtoms.size()>0)
978 {
979 ArrayList<IAtom> toVisitLater = new ArrayList<IAtom>();
980 for (IAtom atomInFocus : toVisitAtoms)
981 {
982 if (visitedAtoms.contains(atomInFocus)
983 || atomInFocus==centralAtom)
984 continue;
985 else
986 visitedAtoms.add(atomInFocus);
987
988 if (candidates.contains(atomInFocus))
989 {
990 atmsInHapto.add(atomInFocus);
991 toVisitLater.addAll(mol.getConnectedAtomsList(atomInFocus));
992 }
993 }
994 toVisitAtoms.clear();
995 toVisitAtoms.addAll(toVisitLater);
996 }
997 return atmsInHapto;
998 }
999
1000//------------------------------------------------------------------------------
1009 static Set<IAtom> exploreConnectivity(IAtom seed, IAtomContainer mol)
1010 {
1011 Set<IAtom> atmsReachableFromSeed = new HashSet<IAtom>();
1012 ArrayList<IAtom> toVisitAtoms = new ArrayList<IAtom>();
1013 toVisitAtoms.add(seed);
1014 ArrayList<IAtom> visitedAtoms = new ArrayList<IAtom>();
1015 while (toVisitAtoms.size()>0)
1016 {
1017 ArrayList<IAtom> toVisitLater = new ArrayList<IAtom>();
1018 for (IAtom atomInFocus : toVisitAtoms)
1019 {
1020 if (visitedAtoms.contains(atomInFocus))
1021 continue;
1022 else
1023 visitedAtoms.add(atomInFocus);
1024
1025 atmsReachableFromSeed.add(atomInFocus);
1026 toVisitLater.addAll(mol.getConnectedAtomsList(atomInFocus));
1027 }
1028 toVisitAtoms.clear();
1029 toVisitAtoms.addAll(toVisitLater);
1030 }
1031 return atmsReachableFromSeed;
1032 }
1033
1034//-----------------------------------------------------------------------------
1035
1044 static Map<String, List<MatchedBond>> getMatchingBondsAllInOne(
1045 IAtomContainer mol, List<CuttingRule> rules, Logger logger)
1046 {
1047 // Collect all SMARTS queries
1048 Map<String,String> smarts = new HashMap<String,String>();
1049 for (CuttingRule rule : rules)
1050 {
1051 smarts.put(rule.getName(),rule.getWholeSMARTSRule());
1052 }
1053
1054 // Prepare a data structure for the return value
1055 Map<String, List<MatchedBond>> bondsMatchingRules =
1056 new HashMap<String, List<MatchedBond>>();
1057
1058 // Get all the matches to the SMARTS queries
1059 ManySMARTSQuery msq = new ManySMARTSQuery(mol, smarts);
1060 if (msq.hasProblems())
1061 {
1062 if (logger!=null)
1063 {
1064 logger.log(Level.WARNING, "Problem matching SMARTS: "
1065 + msq.getMessage());
1066 }
1067 return bondsMatchingRules;
1068 }
1069
1070 for (CuttingRule rule : rules)
1071 {
1072 String ruleName = rule.getName();
1073
1074 if (msq.getNumMatchesOfQuery(ruleName) == 0)
1075 {
1076 continue;
1077 }
1078
1079 // Get atoms matching cutting rule queries
1080 Mappings purgedPairs = msq.getMatchesOfSMARTS(ruleName);
1081
1082 // Evaluate subclass membership and eventually store target bonds
1083 ArrayList<MatchedBond> bondsMatched = new ArrayList<MatchedBond>();
1084 for (int[] pair : purgedPairs)
1085 {
1086 if (pair.length!=2)
1087 {
1088 throw new Error("Cutting rule: " + ruleName
1089 + " has identified " + pair.length + " atoms "
1090 + "instead of 2. Modify rule to make it find a "
1091 + "pair of atoms.");
1092 }
1093 MatchedBond tb = new MatchedBond(mol.getAtom(pair[0]),
1094 mol.getAtom(pair[1]), rule);
1095
1096 // Apply any further option of the cutting rule
1097 if (tb.satisfiesRuleOptions(logger))
1098 bondsMatched.add(tb);
1099 }
1100
1101 if (!bondsMatched.isEmpty())
1102 bondsMatchingRules.put(ruleName, bondsMatched);
1103 }
1104
1105 return bondsMatchingRules;
1106 }
1107
1108//------------------------------------------------------------------------------
1109
1115 public static void manageFragmentCollection(File input,
1116 FragmenterParameters settings,
1117 File output, Logger logger) throws DENOPTIMException, IOException,
1118 IllegalArgumentException, UndetectedFileFormatException
1119 {
1120 FileInputStream fis = new FileInputStream(input);
1121 IteratingSDFReader reader = new IteratingSDFReader(fis,
1122 DefaultChemObjectBuilder.getInstance());
1123
1124 int index = -1;
1125 int maxBufferSize = 2000;
1126 ArrayList<Vertex> buffer = new ArrayList<Vertex>(500);
1127 try {
1128 while (reader.hasNext())
1129 {
1130 index++;
1131 if (logger!=null)
1132 {
1133 logger.log(Level.FINE,"Processing fragment " + index);
1134 }
1135 Vertex frag = new Fragment(reader.next(), BBType.UNDEFINED);
1136 manageFragmentCollection(frag, index, settings,
1137 buffer, logger);
1138
1139 // If max buffer size is reached, then bump to file
1140 if (buffer.size() >= maxBufferSize)
1141 {
1143 buffer, true);
1144 buffer.clear();
1145 }
1146 }
1147 } finally {
1148 reader.close();
1149 }
1150 if (buffer.size() < maxBufferSize)
1151 {
1153 buffer, true);
1154 buffer.clear();
1155 }
1156 }
1157
1158//------------------------------------------------------------------------------
1159
1177 public static void manageFragmentCollection(Vertex frag, int fragCounter,
1178 FragmenterParameters settings,
1179 List<Vertex> collector, Logger logger)
1180 throws DENOPTIMException, IllegalArgumentException,
1182 {
1183
1184 if (!filterFragment((Fragment) frag, settings, logger))
1185 {
1186 return;
1187 }
1188
1189 //Compare with list of fragments to ignore
1190 if (settings.getIgnorableFragments().size() > 0)
1191 {
1192 if (settings.getIgnorableFragments().stream()
1193 .anyMatch(ignorable -> ((Fragment)frag)
1194 .isIsomorphicTo(ignorable)))
1195 {
1196 if (logger!=null)
1197 {
1198 logger.log(Level.FINE,"Fragment " + fragCounter
1199 + " is ignorable.");
1200 }
1201 return;
1202 }
1203 }
1204
1205 //Compare with list of fragments to retain
1206 if (settings.getTargetFragments().size() > 0)
1207 {
1208 if (!settings.getTargetFragments().stream()
1209 .anyMatch(ignorable -> ((Fragment)frag)
1210 .isIsomorphicTo(ignorable)))
1211 {
1212 if (logger!=null)
1213 {
1214 logger.log(Level.FINE,"Fragment " + fragCounter
1215 + " doesn't match any target: rejected.");
1216 }
1217 return;
1218 }
1219 }
1220
1221 // Add dummy atoms on linearities
1223 && settings.doAddDuOnLinearity())
1224 {
1226 settings.getLinearAngleLimit());
1227 }
1228
1229 // Management of duplicate fragments:
1230 // -> identify duplicates (isomorphic fragments),
1231 // -> keep one (or more, if we want to sample the isomorphs),
1232 // -> reject the rest.
1233 if (settings.doManageIsomorphicFamilies())
1234 {
1235 synchronized (settings.MANAGEMWSLOTSSLOCK)
1236 {
1237 String mwSlotID = getMWSlotIdentifier(frag,
1238 settings.getMWSlotSize());
1239
1240 File mwFileUnq = settings.getMWSlotFileNameUnqFrags(
1241 mwSlotID);
1242 File mwFileAll = settings.getMWSlotFileNameAllFrags(
1243 mwSlotID);
1244
1245 // Compare this fragment with previously seen ones
1246 Vertex unqVersion = null;
1247 if (mwFileUnq.exists())
1248 {
1249 ArrayList<Vertex> knownFrags =
1251 unqVersion = knownFrags.stream()
1252 .filter(knownFrag ->
1253 ((Fragment)frag).isIsomorphicTo(knownFrag))
1254 .findAny()
1255 .orElse(null);
1256 }
1257 if (unqVersion!=null)
1258 {
1259 // Identify this unique fragment
1260 String isoFamID = unqVersion.getProperty(
1262 .toString();
1263
1264 // Do we already have enough isomorphic family members
1265 // for this fragment?
1266 int sampleSize = settings.getIsomorphsCount()
1267 .get(isoFamID);
1268 if (sampleSize < settings.getIsomorphicSampleSize())
1269 {
1270 // Add this isomorphic version to the sample
1271 frag.setProperty(
1273 isoFamID);
1274 settings.getIsomorphsCount().put(isoFamID,
1275 sampleSize+1);
1276 DenoptimIO.writeVertexToFile(mwFileAll,
1277 FileFormat.VRTXSDF, frag, true);
1278 collector.add(frag);
1279 } else {
1280 // This would be inefficient in the long run
1281 // because it by-passes the splitting by MW.
1282 // Do not do it!
1283 /*
1284 if (logger!=null)
1285 {
1286 logger.log(Level.FINE,"Fragment "
1287 + fragCounter
1288 + " is isomorphic to unique fragment "
1289 + unqVersionID + ", but we already "
1290 + "have a sample of " + sampleSize
1291 + ": ignoring this fragment from now "
1292 + "on.");
1293 }
1294 settings.getIgnorableFragments().add(frag);
1295 */
1296 }
1297 } else {
1298 // This is a never-seen fragment
1299 String isoFamID = settings.newIsomorphicFamilyID();
1300 frag.setProperty(
1302 isoFamID);
1303 settings.getIsomorphsCount().put(isoFamID, 1);
1304 DenoptimIO.writeVertexToFile(mwFileUnq,
1305 FileFormat.VRTXSDF, frag, true);
1306 DenoptimIO.writeVertexToFile(mwFileAll,
1307 FileFormat.VRTXSDF, frag, true);
1308 collector.add(frag);
1309 }
1310 } // end synchronized block
1311 } else {
1312 //If we are here, we did not ask to remove duplicates
1313 collector.add(frag);
1314 }
1315 }
1316
1317//------------------------------------------------------------------------------
1318
1328 public static boolean filterFragment(Fragment frag,
1329 FragmenterParameters settings)
1330 {
1331 return filterFragment(frag, settings, settings.getLogger());
1332 }
1333
1334//------------------------------------------------------------------------------
1335
1347 public static boolean filterFragment(Fragment frag,
1348 FragmenterParameters settings, Logger logger)
1349 {
1350 // Default filtering criteria: get ring of R/*/X/Xx
1351 for (IAtom atm : frag.atoms())
1352 {
1353 if (MoleculeUtils.isElement(atm))
1354 {
1355 continue;
1356 }
1357 String smb = MoleculeUtils.getSymbolOrLabel(atm);
1358 if (DENOPTIMConstants.DUMMYATMSYMBOL.equals(smb))
1359 {
1360 continue;
1361 }
1362 logger.log(Level.FINE,"Removing fragment contains non-element '"
1363 + smb + "'");
1364 return false;
1365 }
1366
1367 if (settings.isWorkingIn3D())
1368 {
1369 // Incomplete 3D fragmentation: an atom has the same coords of an AP.
1370 for (AttachmentPoint ap : frag.getAttachmentPoints())
1371 {
1372 Point3d ap3d = ap.getDirectionVector();
1373 if (ap3d!=null)
1374 {
1375 for (IAtom atm : frag.atoms())
1376 {
1377 Point3d atm3d = MoleculeUtils.getPoint3d(atm);
1378 double dist = ap3d.distance(atm3d);
1379 if (dist < 0.0002)
1380 {
1381 logger.log(Level.FINE,"Removing fragment with AP"
1382 + frag.getIAtomContainer().indexOf(atm)
1383 + " and atom " + MoleculeUtils.getSymbolOrLabel(atm)
1384 + " coincide.");
1385 return false;
1386 }
1387 }
1388 }
1389 }
1390 }
1391 if (settings.doRejectWeirdIsotopes())
1392 {
1393 for (IAtom atm : frag.atoms())
1394 {
1395 if (MoleculeUtils.isElement(atm))
1396 {
1397 // Unconfigured isotope has null mass number
1398 if (atm.getMassNumber() == null)
1399 continue;
1400
1401 String symb = MoleculeUtils.getSymbolOrLabel(atm);
1402 int a = atm.getMassNumber();
1403 try {
1404 IIsotope major = Isotopes.getInstance().getMajorIsotope(symb);
1405 if (a != major.getMassNumber())
1406 {
1407 logger.log(Level.FINE,"Removing fragment containing "
1408 + "isotope "+symb+a+".");
1409 return false;
1410 }
1411 } catch (Throwable t) {
1412 logger.log(Level.WARNING,"Not able to perform Isotope"
1413 + "detection.");
1414 }
1415 }
1416
1417 }
1418 }
1419
1420 // User-controlled filtering criteria
1421
1422 if (settings.getRejectedElements().size() > 0)
1423 {
1424 for (IAtom atm : frag.atoms())
1425 {
1426 String symb = MoleculeUtils.getSymbolOrLabel(atm);
1427 if (settings.getRejectedElements().contains(symb))
1428 {
1429 logger.log(Level.FINE,"Removing fragment containing '"
1430 + symb + "'.");
1431 return false;
1432 }
1433 }
1434 }
1435
1436 if (settings.getRejectedFormulaLessThan().size() > 0
1437 || settings.getRejectedFormulaMoreThan().size() > 0)
1438 {
1439 Map<String,Double> eaMol = FormulaUtils.getElementalanalysis(
1440 frag.getIAtomContainer());
1441
1442 for (Map<String,Double> criterion :
1443 settings.getRejectedFormulaMoreThan())
1444 {
1445 for (String el : criterion.keySet())
1446 {
1447 if (eaMol.containsKey(el))
1448 {
1449 // -0.5 to make it strictly less-than
1450 if (eaMol.get(el) - criterion.get(el) > 0.5)
1451 {
1452 logger.log(Level.FINE,"Removing fragment that "
1453 + "contains too much '" + el + "' "
1454 + "as requested by formula"
1455 + "-based (more-than) settings (" + el
1456 + eaMol.get(el) + " > " + criterion + ").");
1457 return false;
1458 }
1459 }
1460 }
1461 }
1462
1463 Map<String,Double> criterion = settings.getRejectedFormulaLessThan();
1464 for (String el : criterion.keySet())
1465 {
1466 if (!eaMol.containsKey(el))
1467 {
1468 logger.log(Level.FINE,"Removing fragment that does not "
1469 + "contain '" + el + "' as requested by formula"
1470 + "-based (less-than) settings.");
1471 return false;
1472 } else {
1473 // 0.5 to make it strictly more-than
1474 if (eaMol.get(el) - criterion.get(el) < -0.5)
1475 {
1476 logger.log(Level.FINE,"Removing fragment that "
1477 + "contains too little '" + el + "' "
1478 + "as requested by formula"
1479 + "-based settings (" + el
1480 + eaMol.get(el) + " < " + criterion + ").");
1481 return false;
1482 }
1483 }
1484 }
1485
1486 }
1487
1488 if (settings.getRejectedAPClasses().size() > 0)
1489 {
1490 for (APClass apc : frag.getAllAPClasses())
1491 {
1492 for (String s : settings.getRejectedAPClasses())
1493 {
1494 if (apc.toString().startsWith(s))
1495 {
1496 logger.log(Level.FINE,"Removing fragment with APClass "
1497 + apc);
1498 return false;
1499 }
1500 }
1501 }
1502 }
1503
1504 if (settings.getRejectedAPClassCombinations().size() > 0)
1505 {
1506 loopOverCombinations:
1507 for (String[] conditions : settings.getRejectedAPClassCombinations())
1508 {
1509 for (int ip=0; ip<conditions.length; ip++)
1510 {
1511 String condition = conditions[ip];
1512 boolean found = false;
1513 for (APClass apc : frag.getAllAPClasses())
1514 {
1515 if (apc.toString().startsWith(condition))
1516 {
1517 found = true;
1518 continue;
1519 }
1520 }
1521 if (!found)
1522 continue loopOverCombinations;
1523 // Here we do have at least one AP satisfying the condition.
1524 }
1525 // Here we manage or satisfy all conditions. Therefore, we can
1526 // reject this fragment
1527
1528 String allCondsAsString = "";
1529 for (int i=0; i<conditions.length; i++)
1530 allCondsAsString = allCondsAsString + " " + conditions[i];
1531
1532 logger.log(Level.FINE,"Removing fragment with combination of "
1533 + "APClasses matching '" + allCondsAsString + "'.");
1534 return false;
1535 }
1536 }
1537
1538 if (settings.getMaxFragHeavyAtomCount()>0
1539 || settings.getMinFragHeavyAtomCount()>0)
1540 {
1541 int totHeavyAtm = 0;
1542 for (IAtom atm : frag.atoms())
1543 {
1544 if (MoleculeUtils.isElement(atm))
1545 {
1546 String symb = MoleculeUtils.getSymbolOrLabel(atm);
1547 if ((!symb.equals("H")) && (!symb.equals(
1549 totHeavyAtm++;
1550 }
1551 }
1552 if (settings.getMaxFragHeavyAtomCount() > 0
1553 && totHeavyAtm > settings.getMaxFragHeavyAtomCount())
1554 {
1555 logger.log(Level.FINE,"Removing fragment with too many atoms ("
1556 + totHeavyAtm + " < "
1557 + settings.getMaxFragHeavyAtomCount()
1558 + ")");
1559 return false;
1560 }
1561 if (settings.getMinFragHeavyAtomCount() > 0
1562 && totHeavyAtm < settings.getMinFragHeavyAtomCount())
1563 {
1564 logger.log(Level.FINE,"Removing fragment with too few atoms ("
1565 + totHeavyAtm + " < "
1566 + settings.getMinFragHeavyAtomCount()
1567 + ")");
1568 return false;
1569 }
1570 }
1571
1572 if (settings.getFragRejectionSMARTS().size() > 0)
1573 {
1575 settings.getFragRejectionSMARTS());
1576 if (msq.hasProblems())
1577 {
1578 logger.log(Level.WARNING,"Problems evaluating SMARTS-based "
1579 + "rejection criteria. " + msq.getMessage());
1580 }
1581
1582 for (String criterion : settings.getFragRejectionSMARTS().keySet())
1583 {
1584 if (msq.getNumMatchesOfQuery(criterion)>0)
1585 {
1586 logger.log(Level.FINE,"Removing fragment that matches "
1587 + "SMARTS-based rejection criteria '" + criterion
1588 + "'.");
1589 return false;
1590 }
1591 }
1592 }
1593
1594 if (settings.getFragRetentionSMARTS().size() > 0)
1595 {
1597 settings.getFragRetentionSMARTS());
1598 if (msq.hasProblems())
1599 {
1600 logger.log(Level.WARNING,"Problems evaluating SMARTS-based "
1601 + "rejection criteria. " + msq.getMessage());
1602 }
1603
1604 boolean matchesAny = false;
1605 for (String criterion : settings.getFragRetentionSMARTS().keySet())
1606 {
1607 if (msq.getNumMatchesOfQuery(criterion) > 0)
1608 {
1609 matchesAny = true;
1610 break;
1611 }
1612 }
1613 if (!matchesAny)
1614 {
1615 logger.log(Level.FINE,"Removing fragment that does not "
1616 + "match any SMARTS-based retention criteria.");
1617 return false;
1618 }
1619 }
1620 return true;
1621 }
1622
1623//------------------------------------------------------------------------------
1624
1632 public static String getMWSlotIdentifier(Vertex frag, int slotSize)
1633 {
1634 for (IAtom a : frag.getIAtomContainer().atoms())
1635 {
1636 if (a.getImplicitHydrogenCount()==null)
1637 a.setImplicitHydrogenCount(0);
1638 }
1639 double mw = AtomContainerManipulator.getMass(frag.getIAtomContainer());
1640 int slotNum = (int) (mw / (Double.valueOf(slotSize)));
1641 return slotNum*slotSize + "-" + (slotNum+1)*slotSize;
1642 }
1643
1644//------------------------------------------------------------------------------
1645
1646 public static Vertex getRCVForAP(AttachmentPoint ap, APClass rcvApClass)
1647 throws DENOPTIMException
1648 {
1649 IAtomContainer mol = SilentChemObjectBuilder.getInstance()
1650 .newAtomContainer();
1651 Point3d apv = ap.getDirectionVector();
1652 mol.addAtom(new PseudoAtom(RingClosingAttractor.RCALABELPERAPCLASS.get(
1653 rcvApClass),
1654 new Point3d(
1655 Double.valueOf(apv.x),
1656 Double.valueOf(apv.y),
1657 Double.valueOf(apv.z))));
1658
1659 Fragment rcv = new Fragment(mol, BBType.FRAGMENT);
1660 rcv.setAsRCV(true);
1661
1662 Point3d aps = MoleculeUtils.getPoint3d(
1663 ap.getOwner().getIAtomContainer().getAtom(
1664 ap.getAtomPositionNumber()));
1665 rcv.addAP(0, rcvApClass, new Point3d(
1666 Double.valueOf(aps.x),
1667 Double.valueOf(aps.y),
1668 Double.valueOf(aps.z)));
1669 return rcv;
1670 }
1671
1672//------------------------------------------------------------------------------
1673
1674}
General set of constants used in DENOPTIM.
static final Object FORMULASTR
Property name used to store molecular formula as string in an atom container.
static final String ATMPROPVERTEXID
String tag of Atom property used to store the unique ID of the Vertex corresponding to the molecular ...
static final String DUMMYATMSYMBOL
Symbol of dummy atom.
static final Object ISOMORPHICFAMILYID
Property used to store the identifier of the family of isomorphic fragments that owns a fragment.
Exception thrown when the format of a file is not recognized.
static void exploreDGraphForMappings(DGraph graph, IAtomContainer graphIAC, Fragment masterFrag, IAtomContainer masterFragIAC, IAtomContainer mol, Map< IAtom, IAtom > graphToMolMapping)
Recursive function to process edges at any level of embedding to remove the corresponding bonds and c...
static List< Vertex > fragmentation(IAtomContainer mol, List< DGraph > templates, Randomizer randomizer, Logger logger)
Chops one chemical structure by applying the given fragmentation templates.
static Map< String, List< MatchedBond > > getMatchingBondsAllInOne(IAtomContainer mol, List< CuttingRule > rules, Logger logger)
Identification of the bonds matching a list of SMARTS queries.
static void checkElementalAnalysisAgainstFormula(File input, File output, Logger logger)
Processes all molecules analyzing the composition of the structure in the chemical representation as ...
static List< Vertex > fragmentation(IAtomContainer mol, List< CuttingRule > rules, Logger logger)
Chops one chemical structure by applying the given cutting rules.
static boolean filterFragment(Fragment frag, FragmenterParameters settings)
Filter fragments according to the criteria defined in the settings.
static Set< IAtom > exploreConnectivity(IAtom seed, IAtomContainer mol)
Explores the connectivity annotating which atoms have been visited.
static IAtomContainer reduceTemplateToVertexBoundaries(IAtomContainer templateMol)
Reduces a template molecule to keep only atoms at vertex boundaries (atoms connected to atoms with di...
static boolean prepareMolToFragmentation(IAtomContainer mol, FragmenterParameters settings, int index)
Do any pre-processing on a IAtomContainer meant to be fragmented.
static void manageFragmentCollection(Vertex frag, int fragCounter, FragmenterParameters settings, List< Vertex > collector, Logger logger)
Management of fragments: includes application of fragment filters, rejection rules,...
static Set< IAtom > exploreHapticity(IAtom seed, IAtom centralAtom, ArrayList< IAtom > candidates, IAtomContainer mol)
Identifies non-central atoms involved in the same n-hapto ligand as the seed atom.
static boolean filterFragment(Fragment frag, FragmenterParameters settings, Logger logger)
Filter fragments according to the criteria defined in the settings.
static void filterStrucutresBySMARTS(File input, Set< String > smarts, File output, Logger logger)
Removes from the structures anyone that matches any of the given SMARTS queries.
static String getMWSlotIdentifier(Vertex frag, int slotSize)
Determines the name of the MW slot to use when comparing the given fragment with previously stored fr...
static void manageFragmentCollection(File input, FragmenterParameters settings, File output, Logger logger)
Management of fragments: includes application of fragment filters, rejection rules,...
static Vertex getRCVForAP(AttachmentPoint ap, APClass rcvApClass)
static boolean fragmentationFromGraphs(File input, FragmenterParameters settings, File output, Logger logger)
Performs fragmentation from graphs, i.e., extracts existing fragments from graphs (stored in a file).
static boolean fragmentation(File input, FragmenterParameters settings, File output, Logger logger)
Performs fragmentation according to the given cutting rules.
An attachment point (AP) is a possibility to attach a Vertex onto the vertex holding the AP (i....
Container for the list of vertices and the edges that connect them.
Definition: DGraph.java:102
List< Vertex > getVertexList()
Definition: DGraph.java:947
List< Edge > getEdgeList()
Definition: DGraph.java:992
This class represents the edge between two vertices.
Definition: Edge.java:38
Class representing a continuously connected portion of chemical object holding attachment points.
Definition: Fragment.java:61
void addAP(int atomPositionNumber)
Adds an attachment point with a dummy APClass.
Definition: Fragment.java:343
AttachmentPoint addAPOnAtom(IAtom srcAtm, APClass apc, Point3d vector)
Add an attachment point to the specifies atom.
Definition: Fragment.java:424
List< AttachmentPoint > getAttachmentPoints()
Definition: Fragment.java:1141
Fragment clone()
Returns a deep copy of this fragments.
Definition: Fragment.java:733
Iterable< IAtom > atoms()
Definition: Fragment.java:822
IAtomContainer getIAtomContainer()
Definition: Fragment.java:788
void removeAtoms(Collection< IAtom > atoms)
Removes a list of atoms and updates the list of attachment points.
Definition: Fragment.java:913
A vertex is a data structure that has an identity and holds a list of AttachmentPoints.
Definition: Vertex.java:61
ArrayList< APClass > getAllAPClasses()
Returns the list of all APClasses present on this vertex.
Definition: Vertex.java:792
void setAsRCV(boolean isRCV)
Definition: Vertex.java:274
Object getProperty(Object property)
Definition: Vertex.java:1223
abstract IAtomContainer getIAtomContainer()
void setProperty(Object key, Object property)
Definition: Vertex.java:1235
The RingClosingAttractor represent the available valence/connection that allows to close a ring.
static final HashMap< APClass, String > RCALABELPERAPCLASS
Conventional labels for attractor pseudoatom.
Utility methods for input/output.
static File writeVertexesToFile(File file, FileFormat format, List< Vertex > vertexes)
Writes vertexes to file.
static void writeSDFFile(String fileName, IAtomContainer mol)
Writes IAtomContainer to SDF file.
static File writeVertexToFile(File file, FileFormat format, Vertex vertex, boolean append)
Writes vertexes to file.
static ArrayList< DGraph > readDENOPTIMGraphsFromFile(File inFile)
Reads a list of DGraphs from file.
static ArrayList< Vertex > readVertexes(File file, Vertex.BBType bbt)
Reads Vertexes from any file that can contain such items.
An iterator that take IAtomContainers from a file, possibly using an available iterating reader,...
void close()
Close the memory-efficient iterator if any is open.
Tool to build build three-dimensional (3D) tree-like molecular structures from DGraph.
IAtomContainer convertGraphTo3DAtomContainer(DGraph graph)
Created a three-dimensional molecular representation from a given DGraph.
Logger getLogger()
Get the name of the program specific logger.
A cutting rule with three SMARTS queries (atom 1, bond, atom2) and options.
Parameters controlling execution of the fragmenter.
boolean doRejectWeirdIsotopes
Flag requesting to reject fragments with minor isotopes.
boolean addExplicitH
Flag requesting to add explicit H atoms.
Boolean satisfiesRuleOptions
Flag indicating that we have checked the additional option from the cutting rule (otherwise this flag...
Toll to add/remove dummy atoms from linearities or multi-hapto sites.
static void addDummiesOnLinearities(Fragment frag, double angLim)
Append dummy atoms on otherwise linear arrangements of atoms.
Utilities for manipulating molecular formulas.
static boolean compareFormulaAndElementalAnalysis(String formula, IAtomContainer mol)
Compares the molecular formula formatted as from the Cambridge Structural Database (CSD) against the ...
static Map< String, Double > getElementalanalysis(IAtomContainer mol)
Threads Deuterium as a different element than Hydrogen.
Container of lists of atoms matching a list of SMARTS.
Map< String, Mappings > getAllMatches()
int getNumMatchesOfQuery(String query)
Utilities for molecule conversion.
static void setZeroImplicitHydrogensToAllAtoms(IAtomContainer iac)
Sets zero implicit hydrogen count to all atoms.
static int getDimensions(IAtomContainer mol)
Determines the dimensionality of the given chemical object.
static String getSymbolOrLabel(IAtom atm)
Gets either the elemental symbol (for standard atoms) of the label (for pseudo-atoms).
static List< Map< IAtom, IAtom > > findUniqueAtomMappings(IAtomContainer substructure, IAtomContainer mol, Logger logger)
Finds the maximum common substructure (MCS) between two molecules.
static void ensureNoUnsetBondOrders(IAtomContainer iac)
Sets bond order = single to all otherwise unset bonds.
static void explicitHydrogens(IAtomContainer mol)
Converts all the implicit hydrogens to explicit.
static List< IAtom > findShortestPath(IAtomContainer mol, IAtom start, IAtom end, Map< IAtom, Long > atomToVertexId)
Finds the shortest path between two atoms in a molecule using BFS.
static Point3d getPoint3d(IAtom atm)
Return the 3D coordinates, if present.
static boolean isElement(IAtom atom)
Check element symbol corresponds to real element of Periodic Table.
Tool to generate random numbers and random decisions.
Definition: Randomizer.java:35
File formats identified by DENOPTIM.
Definition: FileFormat.java:32
The type of building block.
Definition: Vertex.java:86
FRG_PARAMS
Parameters controlling the fragmenter.