$darkmode
DENOPTIM
FragmenterTools.java
Go to the documentation of this file.
1package denoptim.fragmenter;
2
3import java.io.File;
4import java.io.FileInputStream;
5import java.io.IOException;
6import java.util.ArrayList;
7import java.util.HashMap;
8import java.util.HashSet;
9import java.util.List;
10import java.util.Map;
11import java.util.Set;
12import java.util.logging.Level;
13import java.util.logging.Logger;
14
15import javax.vecmath.Point3d;
16
17import org.openscience.cdk.Bond;
18import org.openscience.cdk.DefaultChemObjectBuilder;
19import org.openscience.cdk.PseudoAtom;
20import org.openscience.cdk.config.Isotopes;
21import org.openscience.cdk.exception.CDKException;
22import org.openscience.cdk.interfaces.IAtom;
23import org.openscience.cdk.interfaces.IAtomContainer;
24import org.openscience.cdk.interfaces.IBond;
25import org.openscience.cdk.interfaces.IIsotope;
26import org.openscience.cdk.io.iterator.IteratingSDFReader;
27import org.openscience.cdk.isomorphism.Mappings;
28import org.openscience.cdk.silent.SilentChemObjectBuilder;
29import org.openscience.cdk.tools.manipulator.AtomContainerManipulator;
30
31import denoptim.constants.DENOPTIMConstants;
32import denoptim.exception.DENOPTIMException;
33import denoptim.files.FileFormat;
34import denoptim.files.UndetectedFileFormatException;
35import denoptim.graph.APClass;
36import denoptim.graph.AttachmentPoint;
37import denoptim.graph.Fragment;
38import denoptim.graph.Vertex;
39import denoptim.graph.Vertex.BBType;
40import denoptim.graph.rings.RingClosingAttractor;
41import denoptim.io.DenoptimIO;
42import denoptim.io.IteratingAtomContainerReader;
43import denoptim.programs.RunTimeParameters.ParametersType;
44import denoptim.programs.fragmenter.CuttingRule;
45import denoptim.programs.fragmenter.FragmenterParameters;
46import denoptim.programs.fragmenter.MatchedBond;
47import denoptim.utils.DummyAtomHandler;
48import denoptim.utils.FormulaUtils;
49import denoptim.utils.ManySMARTSQuery;
50import denoptim.utils.MoleculeUtils;
51
52public class FragmenterTools
53{
54
55//------------------------------------------------------------------------------
56
71 public static void checkElementalAnalysisAgainstFormula(File input,
72 File output, Logger logger)
73 throws DENOPTIMException, IOException
74 {
75 FileInputStream fis = new FileInputStream(input);
76 IteratingSDFReader reader = new IteratingSDFReader(fis,
77 DefaultChemObjectBuilder.getInstance());
78
79 int index = -1;
80 int maxBufferSize = 2000;
81 ArrayList<IAtomContainer> buffer = new ArrayList<IAtomContainer>(500);
82 try {
83 while (reader.hasNext())
84 {
85 index++;
86 if (logger!=null)
87 {
88 logger.log(Level.FINE,"Checking elemental analysis of "
89 + "structure " + index);
90 }
91 IAtomContainer mol = reader.next();
92 if (mol.getProperty(DENOPTIMConstants.FORMULASTR)==null)
93 {
94 throw new Error("Property '" + DENOPTIMConstants.FORMULASTR
95 + "' not found in molecule " + index + " in file "
96 + input + ". Cannot compare formula with elemental"
97 + "analysis.");
98 }
99 String formula = mol.getProperty(DENOPTIMConstants.FORMULASTR)
100 .toString();
101
103 mol, logger))
104 {
105 buffer.add(mol);
106 } else {
107 if (logger!=null)
108 {
109 logger.log(Level.INFO,"Inconsistency between elemental "
110 + "analysis of structure and molecular formula."
111 + " Rejecting structure " + index + ": "
112 + mol.getTitle());
113 }
114 }
115
116 // If max buffer size is reached, then bump to file
117 if (buffer.size() >= maxBufferSize)
118 {
119 DenoptimIO.writeSDFFile(output.getAbsolutePath(), buffer,
120 true);
121 buffer.clear();
122 }
123 }
124 }
125 finally {
126 reader.close();
127 }
128 if (buffer.size() < maxBufferSize)
129 {
130 DenoptimIO.writeSDFFile(output.getAbsolutePath(), buffer, true);
131 buffer.clear();
132 }
133 }
134
135//------------------------------------------------------------------------------
136
137
151 public static boolean prepareMolToFragmentation(IAtomContainer mol,
152 FragmenterParameters settings, int index)
153 {
154 try
155 {
156 if (settings.addExplicitH())
157 {
159 } else {
161 }
163 } catch (CDKException e)
164 {
165 if (!settings.acceptUnsetToSingeBO())
166 {
167 settings.getLogger().log(Level.WARNING,"Some bond order "
168 + "are unset and attempt to kekulize the "
169 + "system has failed "
170 + "for structure " + index + "."
171 + "This hampers use of SMARTS queries, which "
172 + "may very "
173 + "not work as expected. Structure " + index
174 + " will "
175 + "be rejected. You can avoid rejection by using "
176 + "keyword "
177 + ParametersType.FRG_PARAMS.getKeywordRoot()
178 + "UNSETTOSINGLEBO, but you'll "
179 + "still be using a peculiar connectivity "
180 + "table were"
181 + "many bonds are artificially markes as "
182 + "single to "
183 + "avoid use of 'UNSET' bond order. "
184 + "Further details on the problem: "
185 + e.getMessage());
186 return false;
187 } else {
188 settings.getLogger().log(Level.WARNING,"Failed "
189 + "kekulization "
190 + "for structure " + index
191 + " but UNSETTOSINGLEBO "
192 + "keyword used. Forcing use of single bonds to "
193 + "replace bonds with unset order.");
194 for (IBond bnd : mol.bonds())
195 {
196 if (bnd.getOrder().equals(IBond.Order.UNSET))
197 {
198 bnd.setOrder(IBond.Order.SINGLE);
199 }
200 }
201 }
202 }
203 return true;
204 }
205
206//------------------------------------------------------------------------------
207
219 public static void filterStrucutresBySMARTS(File input, Set<String> smarts,
220 File output, Logger logger)
221 throws DENOPTIMException, IOException
222 {
223 FileInputStream fis = new FileInputStream(input);
224 IteratingSDFReader reader = new IteratingSDFReader(fis,
225 DefaultChemObjectBuilder.getInstance());
226
227 int i = -1;
228 Map<String, String> smartsMap = new HashMap<String, String>();
229 for (String s : smarts)
230 {
231 i++;
232 smartsMap.put("prefilter-"+i, s);
233 }
234
235 int index = -1;
236 int maxBufferSize = 2000;
237 ArrayList<IAtomContainer> buffer = new ArrayList<IAtomContainer>(500);
238 try {
239 while (reader.hasNext())
240 {
241 index++;
242 if (logger!=null)
243 {
244 logger.log(Level.FINE,"Prefiltering structure " + index);
245 }
246 IAtomContainer mol = reader.next();
247
248 ManySMARTSQuery msq = new ManySMARTSQuery(mol, smartsMap);
249 if (msq.hasProblems())
250 {
251 String msg = "WARNING! Problems while searching for "
252 + "specific atoms/bonds using SMARTS: "
253 + msq.getMessage();
254 throw new DENOPTIMException(msg,msq.getProblem());
255 }
256 Map<String, Mappings> allMatches = msq.getAllMatches();
257
258 if (allMatches.size()==0)
259 {
260 buffer.add(mol);
261 } else {
262 String hits = "";
263 for (String s : allMatches.keySet())
264 hits = hits + DenoptimIO.NL + smartsMap.get(s);
265 if (logger!=null)
266 {
267 logger.log(Level.INFO,"Found match for " + hits
268 + "Rejecting structure " + index + ": "
269 + mol.getTitle());
270 }
271 }
272
273 // If max buffer size is reached, then bump to file
274 if (buffer.size() >= maxBufferSize)
275 {
276 DenoptimIO.writeSDFFile(output.getAbsolutePath(), buffer,
277 true);
278 buffer.clear();
279 }
280 }
281 } finally {
282 reader.close();
283 }
284 if (buffer.size() < maxBufferSize)
285 {
286 DenoptimIO.writeSDFFile(output.getAbsolutePath(), buffer, true);
287 buffer.clear();
288 }
289 }
290
291//-----------------------------------------------------------------------------
292
312 public static boolean fragmentation(File input, FragmenterParameters settings,
313 File output, Logger logger) throws CDKException, IOException,
314 DENOPTIMException, IllegalArgumentException, UndetectedFileFormatException
315 {
318
319 int totalProd = 0;
320 int totalKept = 0;
321 int index = -1;
322 try {
323 while (iterator.hasNext())
324 {
325 index++;
326 if (logger!=null)
327 {
328 logger.log(Level.FINE,"Fragmenting structure " + index);
329 }
330 IAtomContainer mol = iterator.next();
331 String molName = "noname-mol" + index;
332 if (mol.getTitle()!=null && !mol.getTitle().isBlank())
333 molName = mol.getTitle();
334
335 // Generate the fragments
336 List<Vertex> fragments = fragmentation(mol,
337 settings.getCuttingRules(),
338 logger);
339 if (logger!=null)
340 {
341 logger.log(Level.FINE,"Fragmentation produced "
342 + fragments.size() + " fragments.");
343 }
344 totalProd += fragments.size();
345
346 // Post-fragmentation processing of fragments
347 List<Vertex> keptFragments = new ArrayList<Vertex>();
348 int fragCounter = 0;
349 for (Vertex frag : fragments)
350 {
351 // Add metadata
352 String fragIdStr = "From_" + molName + "_" + fragCounter;
353 frag.setProperty("cdk:Title", fragIdStr);
354 fragCounter++;
355 manageFragmentCollection(frag, fragCounter, settings,
356 keptFragments, logger);
357 }
358 if (logger!=null)
359 {
360 logger.log(Level.FINE,"Fragments surviving post-"
361 + "processing: " + keptFragments.size());
362 }
363 totalKept += keptFragments.size();
364 if (!settings.doManageIsomorphicFamilies() && totalKept>0)
365 {
367 keptFragments,true);
368 }
369 }
370 } finally {
371 iterator.close();
372 }
373
374 // Did we actually produce anything? We might not...
375 if (totalProd==0)
376 {
377 if (logger!=null)
378 {
379 logger.log(Level.WARNING,"No fragment produced. Cutting rules "
380 + "were ineffective on the given structures.");
381 }
382 return false;
383 } else if (totalKept==0)
384 {
385 if (logger!=null)
386 {
387 logger.log(Level.WARNING,"No fragment kept out of " + totalProd
388 + " produced fragments. Filtering criteria might be "
389 + "too restrictive.");
390 }
391 return false;
392 }
393 return true;
394 }
395
396//------------------------------------------------------------------------------
397
406 public static List<Vertex> fragmentation(IAtomContainer mol,
407 List<CuttingRule> rules, Logger logger) throws DENOPTIMException
408 {
409 Fragment masterFrag = new Fragment(mol,BBType.UNDEFINED);
410 IAtomContainer fragsMol = masterFrag.getIAtomContainer();
411
412 // Identify bonds
413 Map<String, List<MatchedBond>> matchingbonds =
414 FragmenterTools.getMatchingBondsAllInOne(fragsMol,rules,logger);
415
416 // Select bonds to cut and what rule to use for cutting them
417 int cutId = -1;
418 for (CuttingRule rule : rules) // NB: iterator follows rule's priority
419 {
420 String ruleName = rule.getName();
421
422 // Skip unmatched rules
423 if (!matchingbonds.keySet().contains(ruleName))
424 continue;
425
426 for (MatchedBond tb: matchingbonds.get(ruleName))
427 {
428 IAtom atmA = tb.getAtmSubClass0();
429 IAtom atmB = tb.getAtmSubClass1();
430
431 //ignore if bond already broken
432 if (!fragsMol.getConnectedAtomsList(atmA).contains(atmB))
433 {
434 continue;
435 }
436
437 //treatment of n-hapto ligands
438 if (rule.isHAPTO())
439 {
440 // Get central atom (i.e., the "mono-hapto" side,
441 // typically the metal)
442 // As a convention the central atom has subclass '0'
443 IAtom centralAtm = atmA;
444
445 // Get list of candidates for hapto-system:
446 // they have same cutting Rule and central metal
447 ArrayList<IAtom> candidatesForHapto = new ArrayList<IAtom>();
448 for (MatchedBond tbForHapto : matchingbonds.get(ruleName))
449 {
450 //Consider only bond involving same central atom
451 if (tbForHapto.getAtmSubClass0() == centralAtm)
452 candidatesForHapto.add(tbForHapto.getAtmSubClass1());
453 }
454
455 // Select atoms in n-hapto system: contiguous neighbors with
456 // same type of bond with the same central atom.
457 Set<IAtom> atmsInHapto = new HashSet<IAtom>();
458 atmsInHapto.add(tb.getAtmSubClass1());
459 atmsInHapto = exploreHapticity(tb.getAtmSubClass1(),
460 centralAtm, candidatesForHapto, fragsMol);
461 if (atmsInHapto.size() == 1)
462 {
463 logger.log(Level.WARNING,"Unable to find more than one "
464 + "bond involved in high-hapticity ligand! "
465 + "Bond ignored.");
466 continue;
467 }
468
469 // Check existence of all bonds involved in multi-hapto system
470 boolean isSystemIntact = true;
471 for (IAtom ligAtm : atmsInHapto)
472 {
473 List<IAtom> nbrsOfLigAtm =
474 fragsMol.getConnectedAtomsList(ligAtm);
475 if (!nbrsOfLigAtm.contains(centralAtm))
476 {
477 isSystemIntact = false;
478 break;
479 }
480 }
481
482 // If not, it means that another rule already acted on the
483 // system thus kill this attempt without generating du-atom
484 if (!isSystemIntact)
485 continue;
486
487 // A dummy atom will be used to define attachment point of
488 // ligand with high hapticity
489 Point3d dummyP3d = new Point3d(); //Used also for 2D
490 for (IAtom ligAtm : atmsInHapto)
491 {
492 Point3d ligP3d = MoleculeUtils.getPoint3d(ligAtm);
493 dummyP3d.x = dummyP3d.x + ligP3d.x;
494 dummyP3d.y = dummyP3d.y + ligP3d.y;
495 dummyP3d.z = dummyP3d.z + ligP3d.z;
496 }
497
498 dummyP3d.x = dummyP3d.x / (double) atmsInHapto.size();
499 dummyP3d.y = dummyP3d.y / (double) atmsInHapto.size();
500 dummyP3d.z = dummyP3d.z / (double) atmsInHapto.size();
501
502 //Add Dummy atom to molecular object
503 //if no other Du is already in the same position
504 IAtom dummyAtm = null;
505 for (IAtom oldDu : fragsMol.atoms())
506 {
509 {
510 Point3d oldDuP3d = oldDu.getPoint3d();
511 if (oldDuP3d.distance(dummyP3d) < 0.002)
512 {
513 dummyAtm = oldDu;
514 break;
515 }
516 }
517 }
518
519 if (dummyAtm==null)
520 {
521 dummyAtm = new PseudoAtom(DENOPTIMConstants.DUMMYATMSYMBOL);
522 dummyAtm.setPoint3d(dummyP3d);
523 fragsMol.addAtom(dummyAtm);
524 }
525
526 // Modify connectivity of atoms involved in high-hapticity
527 // coordination creation of Du-to-ATM bonds
528 // By internal convention the bond order is "SINGLE".
529 IBond.Order border = IBond.Order.valueOf("SINGLE");
530
531 for (IAtom ligAtm : atmsInHapto)
532 {
533 List<IAtom> nbrsOfDu = fragsMol.getConnectedAtomsList(
534 dummyAtm);
535 if (!nbrsOfDu.contains(ligAtm))
536 {
537 // Add bond with dummy
538 Bond bnd = new Bond(dummyAtm,ligAtm,border);
539 fragsMol.addBond(bnd);
540 }
541 // Remove bonds between central and coordinating atoms
542 IBond oldBnd = fragsMol.getBond(centralAtm,ligAtm);
543 fragsMol.removeBond(oldBnd);
544 }
545
546 // NB: by convention the "first" class (i.e., the ???:0 class)
547 // is always on the central atom.
548 AttachmentPoint apA = masterFrag.addAPOnAtom(centralAtm,
549 rule.getAPClass0(),
550 MoleculeUtils.getPoint3d(dummyAtm));
551 AttachmentPoint apB = masterFrag.addAPOnAtom(dummyAtm,
552 rule.getAPClass1(),
553 MoleculeUtils.getPoint3d(centralAtm));
554
555 cutId++;
556 apA.setCutId(cutId);
557 apB.setCutId(cutId);
558 } else {
559 //treatment of mono-hapto ligands
560 IBond bnd = fragsMol.getBond(atmA,atmB);
561 fragsMol.removeBond(bnd);
562
563 AttachmentPoint apA = masterFrag.addAPOnAtom(atmA,
564 rule.getAPClass0(),
566 AttachmentPoint apB = masterFrag.addAPOnAtom(atmB,
567 rule.getAPClass1(),
569
570 cutId++;
571 apA.setCutId(cutId);
572 apB.setCutId(cutId);
573 } //end of if (hapticity>1)
574 } //end of loop over matching bonds
575 } //end of loop over rules
576
577 // Extract isolated fragments
578 ArrayList<Vertex> fragments = new ArrayList<Vertex>();
579 Set<Integer> doneAlready = new HashSet<Integer>();
580 for (int idx=0 ; idx<masterFrag.getAtomCount(); idx++)
581 {
582 if (doneAlready.contains(idx))
583 continue;
584
585 Fragment cloneOfMaster = masterFrag.clone();
586 IAtomContainer iac = cloneOfMaster.getIAtomContainer();
587 Set<IAtom> atmsToKeep = exploreConnectivity(iac.getAtom(idx), iac);
588 atmsToKeep.stream().forEach(atm -> doneAlready.add(iac.indexOf(atm)));
589
590 Set<IAtom> atmsToRemove = new HashSet<IAtom>();
591 for (IAtom atm : cloneOfMaster.atoms())
592 {
593 if (!atmsToKeep.contains(atm))
594 {
595 atmsToRemove.add(atm);
596 }
597 }
598 cloneOfMaster.removeAtoms(atmsToRemove);
599 if (cloneOfMaster.getAttachmentPoints().size()>0)
600 fragments.add(cloneOfMaster);
601 }
602
603 return fragments;
604 }
605
606//------------------------------------------------------------------------------
620 static Set<IAtom> exploreHapticity(IAtom seed, IAtom centralAtom,
621 ArrayList<IAtom> candidates, IAtomContainer mol)
622 {
623 Set<IAtom> atmsInHapto = new HashSet<IAtom>();
624 atmsInHapto.add(seed);
625 ArrayList<IAtom> toVisitAtoms = new ArrayList<IAtom>();
626 toVisitAtoms.add(seed);
627 ArrayList<IAtom> visitedAtoms = new ArrayList<IAtom>();
628 while (toVisitAtoms.size()>0)
629 {
630 ArrayList<IAtom> toVisitLater = new ArrayList<IAtom>();
631 for (IAtom atomInFocus : toVisitAtoms)
632 {
633 if (visitedAtoms.contains(atomInFocus)
634 || atomInFocus==centralAtom)
635 continue;
636 else
637 visitedAtoms.add(atomInFocus);
638
639 if (candidates.contains(atomInFocus))
640 {
641 atmsInHapto.add(atomInFocus);
642 toVisitLater.addAll(mol.getConnectedAtomsList(atomInFocus));
643 }
644 }
645 toVisitAtoms.clear();
646 toVisitAtoms.addAll(toVisitLater);
647 }
648 return atmsInHapto;
649 }
650
651//------------------------------------------------------------------------------
660 static Set<IAtom> exploreConnectivity(IAtom seed, IAtomContainer mol)
661 {
662 Set<IAtom> atmsReachableFromSeed = new HashSet<IAtom>();
663 ArrayList<IAtom> toVisitAtoms = new ArrayList<IAtom>();
664 toVisitAtoms.add(seed);
665 ArrayList<IAtom> visitedAtoms = new ArrayList<IAtom>();
666 while (toVisitAtoms.size()>0)
667 {
668 ArrayList<IAtom> toVisitLater = new ArrayList<IAtom>();
669 for (IAtom atomInFocus : toVisitAtoms)
670 {
671 if (visitedAtoms.contains(atomInFocus))
672 continue;
673 else
674 visitedAtoms.add(atomInFocus);
675
676 atmsReachableFromSeed.add(atomInFocus);
677 toVisitLater.addAll(mol.getConnectedAtomsList(atomInFocus));
678 }
679 toVisitAtoms.clear();
680 toVisitAtoms.addAll(toVisitLater);
681 }
682 return atmsReachableFromSeed;
683 }
684
685//-----------------------------------------------------------------------------
686
695 static Map<String, List<MatchedBond>> getMatchingBondsAllInOne(
696 IAtomContainer mol, List<CuttingRule> rules, Logger logger)
697 {
698 // Collect all SMARTS queries
699 Map<String,String> smarts = new HashMap<String,String>();
700 for (CuttingRule rule : rules)
701 {
702 smarts.put(rule.getName(),rule.getWholeSMARTSRule());
703 }
704
705 // Prepare a data structure for the return value
706 Map<String, List<MatchedBond>> bondsMatchingRules =
707 new HashMap<String, List<MatchedBond>>();
708
709 // Get all the matches to the SMARTS queries
710 ManySMARTSQuery msq = new ManySMARTSQuery(mol, smarts);
711 if (msq.hasProblems())
712 {
713 if (logger!=null)
714 {
715 logger.log(Level.WARNING, "Problem matching SMARTS: "
716 + msq.getMessage());
717 }
718 return bondsMatchingRules;
719 }
720
721 for (CuttingRule rule : rules)
722 {
723 String ruleName = rule.getName();
724
725 if (msq.getNumMatchesOfQuery(ruleName) == 0)
726 {
727 continue;
728 }
729
730 // Get atoms matching cutting rule queries
731 Mappings purgedPairs = msq.getMatchesOfSMARTS(ruleName);
732
733 // Evaluate subclass membership and eventually store target bonds
734 ArrayList<MatchedBond> bondsMatched = new ArrayList<MatchedBond>();
735 for (int[] pair : purgedPairs)
736 {
737 if (pair.length!=2)
738 {
739 throw new Error("Cutting rule: " + ruleName
740 + " has identified " + pair.length + " atoms "
741 + "instead of 2. Modify rule to make it find a "
742 + "pair of atoms.");
743 }
744 MatchedBond tb = new MatchedBond(mol.getAtom(pair[0]),
745 mol.getAtom(pair[1]), rule);
746
747 // Apply any further option of the cutting rule
748 if (tb.satisfiesRuleOptions(logger))
749 bondsMatched.add(tb);
750 }
751
752 if (!bondsMatched.isEmpty())
753 bondsMatchingRules.put(ruleName, bondsMatched);
754 }
755
756 return bondsMatchingRules;
757 }
758
759//------------------------------------------------------------------------------
760
766 public static void manageFragmentCollection(File input,
767 FragmenterParameters settings,
768 File output, Logger logger) throws DENOPTIMException, IOException,
769 IllegalArgumentException, UndetectedFileFormatException
770 {
771 FileInputStream fis = new FileInputStream(input);
772 IteratingSDFReader reader = new IteratingSDFReader(fis,
773 DefaultChemObjectBuilder.getInstance());
774
775 int index = -1;
776 int maxBufferSize = 2000;
777 ArrayList<Vertex> buffer = new ArrayList<Vertex>(500);
778 try {
779 while (reader.hasNext())
780 {
781 index++;
782 if (logger!=null)
783 {
784 logger.log(Level.FINE,"Processing fragment " + index);
785 }
786 Vertex frag = new Fragment(reader.next(), BBType.UNDEFINED);
787 manageFragmentCollection(frag, index, settings,
788 buffer, logger);
789
790 // If max buffer size is reached, then bump to file
791 if (buffer.size() >= maxBufferSize)
792 {
794 buffer, true);
795 buffer.clear();
796 }
797 }
798 } finally {
799 reader.close();
800 }
801 if (buffer.size() < maxBufferSize)
802 {
804 buffer, true);
805 buffer.clear();
806 }
807 }
808
809//------------------------------------------------------------------------------
810
828 public static void manageFragmentCollection(Vertex frag, int fragCounter,
829 FragmenterParameters settings,
830 List<Vertex> collector, Logger logger)
831 throws DENOPTIMException, IllegalArgumentException,
833 {
834
835 if (!filterFragment((Fragment) frag, settings, logger))
836 {
837 return;
838 }
839
840 //Compare with list of fragments to ignore
841 if (settings.getIgnorableFragments().size() > 0)
842 {
843 if (settings.getIgnorableFragments().stream()
844 .anyMatch(ignorable -> ((Fragment)frag)
845 .isIsomorphicTo(ignorable)))
846 {
847 if (logger!=null)
848 {
849 logger.log(Level.FINE,"Fragment " + fragCounter
850 + " is ignorable.");
851 }
852 return;
853 }
854 }
855
856 //Compare with list of fragments to retain
857 if (settings.getTargetFragments().size() > 0)
858 {
859 if (!settings.getTargetFragments().stream()
860 .anyMatch(ignorable -> ((Fragment)frag)
861 .isIsomorphicTo(ignorable)))
862 {
863 if (logger!=null)
864 {
865 logger.log(Level.FINE,"Fragment " + fragCounter
866 + " doesn't match any target: rejected.");
867 }
868 return;
869 }
870 }
871
872 // Add dummy atoms on linearities
874 && settings.doAddDuOnLinearity())
875 {
877 settings.getLinearAngleLimit());
878 }
879
880 // Management of duplicate fragments:
881 // -> identify duplicates (isomorphic fragments),
882 // -> keep one (or more, if we want to sample the isomorphs),
883 // -> reject the rest.
884 if (settings.doManageIsomorphicFamilies())
885 {
886 synchronized (settings.MANAGEMWSLOTSSLOCK)
887 {
888 String mwSlotID = getMWSlotIdentifier(frag,
889 settings.getMWSlotSize());
890
891 File mwFileUnq = settings.getMWSlotFileNameUnqFrags(
892 mwSlotID);
893 File mwFileAll = settings.getMWSlotFileNameAllFrags(
894 mwSlotID);
895
896 // Compare this fragment with previously seen ones
897 Vertex unqVersion = null;
898 if (mwFileUnq.exists())
899 {
900 ArrayList<Vertex> knownFrags =
902 unqVersion = knownFrags.stream()
903 .filter(knownFrag ->
904 ((Fragment)frag).isIsomorphicTo(knownFrag))
905 .findAny()
906 .orElse(null);
907 }
908 if (unqVersion!=null)
909 {
910 // Identify this unique fragment
911 String isoFamID = unqVersion.getProperty(
913 .toString();
914
915 // Do we already have enough isomorphic family members
916 // for this fragment?
917 int sampleSize = settings.getIsomorphsCount()
918 .get(isoFamID);
919 if (sampleSize < settings.getIsomorphicSampleSize())
920 {
921 // Add this isomorphic version to the sample
922 frag.setProperty(
924 isoFamID);
925 settings.getIsomorphsCount().put(isoFamID,
926 sampleSize+1);
927 DenoptimIO.writeVertexToFile(mwFileAll,
928 FileFormat.VRTXSDF, frag, true);
929 collector.add(frag);
930 } else {
931 // This would be inefficient in the long run
932 // because it by-passes the splitting by MW.
933 // Do not do it!
934 /*
935 if (logger!=null)
936 {
937 logger.log(Level.FINE,"Fragment "
938 + fragCounter
939 + " is isomorphic to unique fragment "
940 + unqVersionID + ", but we already "
941 + "have a sample of " + sampleSize
942 + ": ignoring this fragment from now "
943 + "on.");
944 }
945 settings.getIgnorableFragments().add(frag);
946 */
947 }
948 } else {
949 // This is a never-seen fragment
950 String isoFamID = settings.newIsomorphicFamilyID();
951 frag.setProperty(
953 isoFamID);
954 settings.getIsomorphsCount().put(isoFamID, 1);
955 DenoptimIO.writeVertexToFile(mwFileUnq,
956 FileFormat.VRTXSDF, frag, true);
957 DenoptimIO.writeVertexToFile(mwFileAll,
958 FileFormat.VRTXSDF, frag, true);
959 collector.add(frag);
960 }
961 } // end synchronized block
962 } else {
963 //If we are here, we did not ask to remove duplicates
964 collector.add(frag);
965 }
966 }
967
968//------------------------------------------------------------------------------
969
979 public static boolean filterFragment(Fragment frag,
980 FragmenterParameters settings)
981 {
982 return filterFragment(frag, settings, settings.getLogger());
983 }
984
985//------------------------------------------------------------------------------
986
998 public static boolean filterFragment(Fragment frag,
999 FragmenterParameters settings, Logger logger)
1000 {
1001 // Default filtering criteria: get ring of R/*/X/Xx
1002 for (IAtom atm : frag.atoms())
1003 {
1004 if (MoleculeUtils.isElement(atm))
1005 {
1006 continue;
1007 }
1008 String smb = MoleculeUtils.getSymbolOrLabel(atm);
1009 if (DENOPTIMConstants.DUMMYATMSYMBOL.equals(smb))
1010 {
1011 continue;
1012 }
1013 logger.log(Level.FINE,"Removing fragment contains non-element '"
1014 + smb + "'");
1015 return false;
1016 }
1017
1018 if (settings.isWorkingIn3D())
1019 {
1020 // Incomplete 3D fragmentation: an atom has the same coords of an AP.
1021 for (AttachmentPoint ap : frag.getAttachmentPoints())
1022 {
1023 Point3d ap3d = ap.getDirectionVector();
1024 if (ap3d!=null)
1025 {
1026 for (IAtom atm : frag.atoms())
1027 {
1028 Point3d atm3d = MoleculeUtils.getPoint3d(atm);
1029 double dist = ap3d.distance(atm3d);
1030 if (dist < 0.0002)
1031 {
1032 logger.log(Level.FINE,"Removing fragment with AP"
1033 + frag.getIAtomContainer().indexOf(atm)
1034 + " and atom " + MoleculeUtils.getSymbolOrLabel(atm)
1035 + " coincide.");
1036 return false;
1037 }
1038 }
1039 }
1040 }
1041 }
1042 if (settings.doRejectWeirdIsotopes())
1043 {
1044 for (IAtom atm : frag.atoms())
1045 {
1046 if (MoleculeUtils.isElement(atm))
1047 {
1048 // Unconfigured isotope has null mass number
1049 if (atm.getMassNumber() == null)
1050 continue;
1051
1052 String symb = MoleculeUtils.getSymbolOrLabel(atm);
1053 int a = atm.getMassNumber();
1054 try {
1055 IIsotope major = Isotopes.getInstance().getMajorIsotope(symb);
1056 if (a != major.getMassNumber())
1057 {
1058 logger.log(Level.FINE,"Removing fragment containing "
1059 + "isotope "+symb+a+".");
1060 return false;
1061 }
1062 } catch (Throwable t) {
1063 logger.log(Level.WARNING,"Not able to perform Isotope"
1064 + "detection.");
1065 }
1066 }
1067
1068 }
1069 }
1070
1071 // User-controlled filtering criteria
1072
1073 if (settings.getRejectedElements().size() > 0)
1074 {
1075 for (IAtom atm : frag.atoms())
1076 {
1077 String symb = MoleculeUtils.getSymbolOrLabel(atm);
1078 if (settings.getRejectedElements().contains(symb))
1079 {
1080 logger.log(Level.FINE,"Removing fragment containing '"
1081 + symb + "'.");
1082 return false;
1083 }
1084 }
1085 }
1086
1087 if (settings.getRejectedFormulaLessThan().size() > 0
1088 || settings.getRejectedFormulaMoreThan().size() > 0)
1089 {
1090 Map<String,Double> eaMol = FormulaUtils.getElementalanalysis(
1091 frag.getIAtomContainer());
1092
1093 for (Map<String,Double> criterion :
1094 settings.getRejectedFormulaMoreThan())
1095 {
1096 for (String el : criterion.keySet())
1097 {
1098 if (eaMol.containsKey(el))
1099 {
1100 // -0.5 to make it strictly less-than
1101 if (eaMol.get(el) - criterion.get(el) > 0.5)
1102 {
1103 logger.log(Level.FINE,"Removing fragment that "
1104 + "contains too much '" + el + "' "
1105 + "as requested by formula"
1106 + "-based (more-than) settings (" + el
1107 + eaMol.get(el) + " > " + criterion + ").");
1108 return false;
1109 }
1110 }
1111 }
1112 }
1113
1114 Map<String,Double> criterion = settings.getRejectedFormulaLessThan();
1115 for (String el : criterion.keySet())
1116 {
1117 if (!eaMol.containsKey(el))
1118 {
1119 logger.log(Level.FINE,"Removing fragment that does not "
1120 + "contain '" + el + "' as requested by formula"
1121 + "-based (less-than) settings.");
1122 return false;
1123 } else {
1124 // 0.5 to make it strictly more-than
1125 if (eaMol.get(el) - criterion.get(el) < -0.5)
1126 {
1127 logger.log(Level.FINE,"Removing fragment that "
1128 + "contains too little '" + el + "' "
1129 + "as requested by formula"
1130 + "-based settings (" + el
1131 + eaMol.get(el) + " < " + criterion + ").");
1132 return false;
1133 }
1134 }
1135 }
1136
1137 }
1138
1139 if (settings.getRejectedAPClasses().size() > 0)
1140 {
1141 for (APClass apc : frag.getAllAPClasses())
1142 {
1143 for (String s : settings.getRejectedAPClasses())
1144 {
1145 if (apc.toString().startsWith(s))
1146 {
1147 logger.log(Level.FINE,"Removing fragment with APClass "
1148 + apc);
1149 return false;
1150 }
1151 }
1152 }
1153 }
1154
1155 if (settings.getRejectedAPClassCombinations().size() > 0)
1156 {
1157 loopOverCombinations:
1158 for (String[] conditions : settings.getRejectedAPClassCombinations())
1159 {
1160 for (int ip=0; ip<conditions.length; ip++)
1161 {
1162 String condition = conditions[ip];
1163 boolean found = false;
1164 for (APClass apc : frag.getAllAPClasses())
1165 {
1166 if (apc.toString().startsWith(condition))
1167 {
1168 found = true;
1169 continue;
1170 }
1171 }
1172 if (!found)
1173 continue loopOverCombinations;
1174 // Here we do have at least one AP satisfying the condition.
1175 }
1176 // Here we manage or satisfy all conditions. Therefore, we can
1177 // reject this fragment
1178
1179 String allCondsAsString = "";
1180 for (int i=0; i<conditions.length; i++)
1181 allCondsAsString = allCondsAsString + " " + conditions[i];
1182
1183 logger.log(Level.FINE,"Removing fragment with combination of "
1184 + "APClasses matching '" + allCondsAsString + "'.");
1185 return false;
1186 }
1187 }
1188
1189 if (settings.getMaxFragHeavyAtomCount()>0
1190 || settings.getMinFragHeavyAtomCount()>0)
1191 {
1192 int totHeavyAtm = 0;
1193 for (IAtom atm : frag.atoms())
1194 {
1195 if (MoleculeUtils.isElement(atm))
1196 {
1197 String symb = MoleculeUtils.getSymbolOrLabel(atm);
1198 if ((!symb.equals("H")) && (!symb.equals(
1200 totHeavyAtm++;
1201 }
1202 }
1203 if (settings.getMaxFragHeavyAtomCount() > 0
1204 && totHeavyAtm > settings.getMaxFragHeavyAtomCount())
1205 {
1206 logger.log(Level.FINE,"Removing fragment with too many atoms ("
1207 + totHeavyAtm + " < "
1208 + settings.getMaxFragHeavyAtomCount()
1209 + ")");
1210 return false;
1211 }
1212 if (settings.getMinFragHeavyAtomCount() > 0
1213 && totHeavyAtm < settings.getMinFragHeavyAtomCount())
1214 {
1215 logger.log(Level.FINE,"Removing fragment with too few atoms ("
1216 + totHeavyAtm + " < "
1217 + settings.getMinFragHeavyAtomCount()
1218 + ")");
1219 return false;
1220 }
1221 }
1222
1223 if (settings.getFragRejectionSMARTS().size() > 0)
1224 {
1226 settings.getFragRejectionSMARTS());
1227 if (msq.hasProblems())
1228 {
1229 logger.log(Level.WARNING,"Problems evaluating SMARTS-based "
1230 + "rejection criteria. " + msq.getMessage());
1231 }
1232
1233 for (String criterion : settings.getFragRejectionSMARTS().keySet())
1234 {
1235 if (msq.getNumMatchesOfQuery(criterion)>0)
1236 {
1237 logger.log(Level.FINE,"Removing fragment that matches "
1238 + "SMARTS-based rejection criteria '" + criterion
1239 + "'.");
1240 return false;
1241 }
1242 }
1243 }
1244
1245 if (settings.getFragRetentionSMARTS().size() > 0)
1246 {
1248 settings.getFragRetentionSMARTS());
1249 if (msq.hasProblems())
1250 {
1251 logger.log(Level.WARNING,"Problems evaluating SMARTS-based "
1252 + "rejection criteria. " + msq.getMessage());
1253 }
1254
1255 boolean matchesAny = false;
1256 for (String criterion : settings.getFragRetentionSMARTS().keySet())
1257 {
1258 if (msq.getNumMatchesOfQuery(criterion) > 0)
1259 {
1260 matchesAny = true;
1261 break;
1262 }
1263 }
1264 if (!matchesAny)
1265 {
1266 logger.log(Level.FINE,"Removing fragment that does not "
1267 + "match any SMARTS-based retention criteria.");
1268 return false;
1269 }
1270 }
1271 return true;
1272 }
1273
1274//------------------------------------------------------------------------------
1275
1283 public static String getMWSlotIdentifier(Vertex frag, int slotSize)
1284 {
1285 for (IAtom a : frag.getIAtomContainer().atoms())
1286 {
1287 if (a.getImplicitHydrogenCount()==null)
1288 a.setImplicitHydrogenCount(0);
1289 }
1290 double mw = AtomContainerManipulator.getMass(frag.getIAtomContainer());
1291 int slotNum = (int) (mw / (Double.valueOf(slotSize)));
1292 return slotNum*slotSize + "-" + (slotNum+1)*slotSize;
1293 }
1294
1295//------------------------------------------------------------------------------
1296
1297 public static Vertex getRCPForAP(AttachmentPoint ap, APClass rcvApClass)
1298 throws DENOPTIMException
1299 {
1300 IAtomContainer mol = SilentChemObjectBuilder.getInstance()
1301 .newAtomContainer();
1302 Point3d apv = ap.getDirectionVector();
1303 mol.addAtom(new PseudoAtom(RingClosingAttractor.RCALABELPERAPCLASS.get(rcvApClass),
1304 new Point3d(
1305 Double.valueOf(apv.x),
1306 Double.valueOf(apv.y),
1307 Double.valueOf(apv.z))));
1308
1309 Fragment rcv = new Fragment(mol, BBType.FRAGMENT);
1310
1311 Point3d aps = MoleculeUtils.getPoint3d(
1312 ap.getOwner().getIAtomContainer().getAtom(
1313 ap.getAtomPositionNumber()));
1314 rcv.addAP(0, rcvApClass, new Point3d(
1315 Double.valueOf(aps.x),
1316 Double.valueOf(aps.y),
1317 Double.valueOf(aps.z)));
1318 return rcv;
1319 }
1320
1321//------------------------------------------------------------------------------
1322
1323}
General set of constants used in DENOPTIM.
static final Object FORMULASTR
Property name used to store molecular formula as string in an atom container.
static final String DUMMYATMSYMBOL
Symbol of dummy atom.
static final Object ISOMORPHICFAMILYID
Property used to store the identifier of the family of isomorphic fragments that owns a fragment.
Exception thrown when the format of a file is not recognized.
static Map< String, List< MatchedBond > > getMatchingBondsAllInOne(IAtomContainer mol, List< CuttingRule > rules, Logger logger)
Identification of the bonds matching a list of SMARTS queries.
static void checkElementalAnalysisAgainstFormula(File input, File output, Logger logger)
Processes all molecules analyzing the composition of the structure in the chemical representation as ...
static List< Vertex > fragmentation(IAtomContainer mol, List< CuttingRule > rules, Logger logger)
Chops one chemical structure by applying the given cutting rules.
static boolean filterFragment(Fragment frag, FragmenterParameters settings)
Filter fragments according to the criteria defined in the settings.
static Set< IAtom > exploreConnectivity(IAtom seed, IAtomContainer mol)
Explores the connectivity annotating which atoms have been visited.
static Vertex getRCPForAP(AttachmentPoint ap, APClass rcvApClass)
static boolean prepareMolToFragmentation(IAtomContainer mol, FragmenterParameters settings, int index)
Do any pre-processing on a IAtomContainer meant to be fragmented.
static void manageFragmentCollection(Vertex frag, int fragCounter, FragmenterParameters settings, List< Vertex > collector, Logger logger)
Management of fragments: includes application of fragment filters, rejection rules,...
static Set< IAtom > exploreHapticity(IAtom seed, IAtom centralAtom, ArrayList< IAtom > candidates, IAtomContainer mol)
Identifies non-central atoms involved in the same n-hapto ligand as the seed atom.
static boolean filterFragment(Fragment frag, FragmenterParameters settings, Logger logger)
Filter fragments according to the criteria defined in the settings.
static void filterStrucutresBySMARTS(File input, Set< String > smarts, File output, Logger logger)
Removes from the structures anyone that matches any of the given SMARTS queries.
static String getMWSlotIdentifier(Vertex frag, int slotSize)
Determines the name of the MW slot to use when comparing the given fragment with previously stored fr...
static void manageFragmentCollection(File input, FragmenterParameters settings, File output, Logger logger)
Management of fragments: includes application of fragment filters, rejection rules,...
static boolean fragmentation(File input, FragmenterParameters settings, File output, Logger logger)
Performs fragmentation according to the given cutting rules.
An attachment point (AP) is a possibility to attach a Vertex onto the vertex holding the AP (i....
Class representing a continuously connected portion of chemical object holding attachment points.
Definition: Fragment.java:61
void addAP(int atomPositionNumber)
Adds an attachment point with a dummy APClass.
Definition: Fragment.java:343
AttachmentPoint addAPOnAtom(IAtom srcAtm, APClass apc, Point3d vector)
Add an attachment point to the specifies atom.
Definition: Fragment.java:424
List< AttachmentPoint > getAttachmentPoints()
Definition: Fragment.java:1120
Fragment clone()
Returns a deep copy of this fragments.
Definition: Fragment.java:733
Iterable< IAtom > atoms()
Definition: Fragment.java:822
IAtomContainer getIAtomContainer()
Definition: Fragment.java:788
void removeAtoms(Collection< IAtom > atoms)
Removes a list of atoms and updates the list of attachment points.
Definition: Fragment.java:913
A vertex is a data structure that has an identity and holds a list of AttachmentPoints.
Definition: Vertex.java:61
ArrayList< APClass > getAllAPClasses()
Returns the list of all APClasses present on this vertex.
Definition: Vertex.java:720
Object getProperty(Object property)
Definition: Vertex.java:1136
abstract IAtomContainer getIAtomContainer()
void setProperty(Object key, Object property)
Definition: Vertex.java:1148
The RingClosingAttractor represent the available valence/connection that allows to close a ring.
static final HashMap< APClass, String > RCALABELPERAPCLASS
Conventional labels for attractor pseudoatom.
Utility methods for input/output.
static File writeVertexesToFile(File file, FileFormat format, List< Vertex > vertexes)
Writes vertexes to file.
static void writeSDFFile(String fileName, IAtomContainer mol)
Writes IAtomContainer to SDF file.
static File writeVertexToFile(File file, FileFormat format, Vertex vertex, boolean append)
Writes vertexes to file.
static ArrayList< Vertex > readVertexes(File file, Vertex.BBType bbt)
Reads Vertexes from any file that can contain such items.
An iterator that take IAtomContainers from a file, possibly using an available iterating reader,...
void close()
Close the memory-efficient iterator if any is open.
Logger getLogger()
Get the name of the program specific logger.
A cutting rule with three SMARTS queries (atom 1, bond, atom2) and options.
Parameters controlling execution of the fragmenter.
boolean doRejectWeirdIsotopes
Flag requesting to reject fragments with minor isotopes.
boolean addExplicitH
Flag requesting to add explicit H atoms.
Boolean satisfiesRuleOptions
Flag indicating that we have checked the additional option from the cutting rule (otherwise this flag...
Toll to add/remove dummy atoms from linearities or multi-hapto sites.
static void addDummiesOnLinearities(Fragment frag, double angLim)
Append dummy atoms on otherwise linear arrangements of atoms.
Utilities for manipulating molecular formulas.
static boolean compareFormulaAndElementalAnalysis(String formula, IAtomContainer mol)
Compares the molecular formula formatted as from the Cambridge Structural Database (CSD) against the ...
static Map< String, Double > getElementalanalysis(IAtomContainer mol)
Threads Deuterium as a different element than Hydrogen.
Container of lists of atoms matching a list of SMARTS.
Map< String, Mappings > getAllMatches()
int getNumMatchesOfQuery(String query)
Utilities for molecule conversion.
static void setZeroImplicitHydrogensToAllAtoms(IAtomContainer iac)
Sets zero implicit hydrogen count to all atoms.
static int getDimensions(IAtomContainer mol)
Determines the dimensionality of the given chemical object.
static String getSymbolOrLabel(IAtom atm)
Gets either the elemental symbol (for standard atoms) of the label (for pseudo-atoms).
static void ensureNoUnsetBondOrders(IAtomContainer iac)
Sets bond order = single to all otherwise unset bonds.
static void explicitHydrogens(IAtomContainer mol)
Converts all the implicit hydrogens to explicit.
static Point3d getPoint3d(IAtom atm)
Return the 3D coordinates, if present.
static boolean isElement(IAtom atom)
Check element symbol corresponds to real element of Periodic Table.
File formats identified by DENOPTIM.
Definition: FileFormat.java:32
The type of building block.
Definition: Vertex.java:86
FRG_PARAMS
Parameters controlling the fragmenter.