$darkmode
DENOPTIM
FragmenterTools.java
Go to the documentation of this file.
1package denoptim.fragmenter;
2
3import java.io.File;
4import java.io.FileInputStream;
5import java.io.IOException;
6import java.util.ArrayList;
7import java.util.HashMap;
8import java.util.HashSet;
9import java.util.List;
10import java.util.Map;
11import java.util.Set;
12import java.util.logging.Level;
13import java.util.logging.Logger;
14
15import javax.vecmath.Point3d;
16
17import org.openscience.cdk.Bond;
18import org.openscience.cdk.DefaultChemObjectBuilder;
19import org.openscience.cdk.PseudoAtom;
20import org.openscience.cdk.config.Isotopes;
21import org.openscience.cdk.exception.CDKException;
22import org.openscience.cdk.interfaces.IAtom;
23import org.openscience.cdk.interfaces.IAtomContainer;
24import org.openscience.cdk.interfaces.IBond;
25import org.openscience.cdk.interfaces.IIsotope;
26import org.openscience.cdk.io.iterator.IteratingSDFReader;
27import org.openscience.cdk.isomorphism.Mappings;
28import org.openscience.cdk.silent.SilentChemObjectBuilder;
29import org.openscience.cdk.tools.manipulator.AtomContainerManipulator;
30
31import denoptim.constants.DENOPTIMConstants;
32import denoptim.exception.DENOPTIMException;
33import denoptim.files.FileFormat;
34import denoptim.files.UndetectedFileFormatException;
35import denoptim.graph.APClass;
36import denoptim.graph.AttachmentPoint;
37import denoptim.graph.Fragment;
38import denoptim.graph.Vertex;
39import denoptim.graph.Vertex.BBType;
40import denoptim.graph.rings.RingClosingAttractor;
41import denoptim.io.DenoptimIO;
42import denoptim.io.IteratingAtomContainerReader;
43import denoptim.programs.RunTimeParameters.ParametersType;
44import denoptim.programs.fragmenter.CuttingRule;
45import denoptim.programs.fragmenter.FragmenterParameters;
46import denoptim.programs.fragmenter.MatchedBond;
47import denoptim.utils.DummyAtomHandler;
48import denoptim.utils.FormulaUtils;
49import denoptim.utils.ManySMARTSQuery;
50import denoptim.utils.MoleculeUtils;
51
52public class FragmenterTools
53{
54
55//------------------------------------------------------------------------------
56
71 public static void checkElementalAnalysisAgainstFormula(File input,
72 File output, Logger logger)
73 throws DENOPTIMException, IOException
74 {
75 FileInputStream fis = new FileInputStream(input);
76 IteratingSDFReader reader = new IteratingSDFReader(fis,
77 DefaultChemObjectBuilder.getInstance());
78
79 int index = -1;
80 int maxBufferSize = 2000;
81 ArrayList<IAtomContainer> buffer = new ArrayList<IAtomContainer>(500);
82 try {
83 while (reader.hasNext())
84 {
85 index++;
86 if (logger!=null)
87 {
88 logger.log(Level.FINE,"Checking elemental analysis of "
89 + "structure " + index);
90 }
91 IAtomContainer mol = reader.next();
92 if (mol.getProperty(DENOPTIMConstants.FORMULASTR)==null)
93 {
94 throw new Error("Property '" + DENOPTIMConstants.FORMULASTR
95 + "' not found in molecule " + index + " in file "
96 + input + ". Cannot compare formula with elemental"
97 + "analysis.");
98 }
99 String formula = mol.getProperty(DENOPTIMConstants.FORMULASTR)
100 .toString();
101
103 mol, logger))
104 {
105 buffer.add(mol);
106 } else {
107 if (logger!=null)
108 {
109 logger.log(Level.INFO,"Inconsistency between elemental "
110 + "analysis of structure and molecular formula."
111 + " Rejecting structure " + index + ": "
112 + mol.getTitle());
113 }
114 }
115
116 // If max buffer size is reached, then bump to file
117 if (buffer.size() >= maxBufferSize)
118 {
119 DenoptimIO.writeSDFFile(output.getAbsolutePath(), buffer,
120 true);
121 buffer.clear();
122 }
123 }
124 }
125 finally {
126 reader.close();
127 }
128 if (buffer.size() < maxBufferSize)
129 {
130 DenoptimIO.writeSDFFile(output.getAbsolutePath(), buffer, true);
131 buffer.clear();
132 }
133 }
134
135//------------------------------------------------------------------------------
136
137
151 public static boolean prepareMolToFragmentation(IAtomContainer mol,
152 FragmenterParameters settings, int index)
153 {
154 try
155 {
156 if (settings.addExplicitH())
157 {
159 } else {
161 }
163 } catch (CDKException e)
164 {
165 if (e.getMessage().contains("Cannot assign Kekulé structure"))
166 {
167 if (!settings.acceptUnsetToSingeBO())
168 {
169 settings.getLogger().log(Level.WARNING,"Some bond order "
170 + "are unset and attempt to kekulize the "
171 + "system has failed "
172 + "for structure " + index + ". "
173 + "This hampers use of SMARTS queries, which "
174 + "may very well "
175 + "not work as expected. Structure " + index
176 + " will be rejected. "
177 + "You can avoid rejection by using "
178 + "keyword "
179 + ParametersType.FRG_PARAMS.getKeywordRoot()
180 + "UNSETTOSINGLEBO, but you'll "
181 + "still be using a peculiar connectivity "
182 + "table were "
183 + "many bonds are artificially marked as "
184 + "single to "
185 + "avoid use of 'UNSET' bond order. "
186 + "Further details on the problem: "
187 + e.getMessage());
188 return false;
189 } else {
190 settings.getLogger().log(Level.WARNING,"Failed "
191 + "kekulization "
192 + "for structure " + index
193 + " but UNSETTOSINGLEBO "
194 + "keyword used. Forcing use of single bonds to "
195 + "replace bonds with unset order.");
196 for (IBond bnd : mol.bonds())
197 {
198 if (bnd.getOrder().equals(IBond.Order.UNSET))
199 {
200 bnd.setOrder(IBond.Order.SINGLE);
201 }
202 }
203 }
204 }
205 }
206 return true;
207 }
208
209//------------------------------------------------------------------------------
210
222 public static void filterStrucutresBySMARTS(File input, Set<String> smarts,
223 File output, Logger logger)
224 throws DENOPTIMException, IOException
225 {
226 FileInputStream fis = new FileInputStream(input);
227 IteratingSDFReader reader = new IteratingSDFReader(fis,
228 DefaultChemObjectBuilder.getInstance());
229
230 int i = -1;
231 Map<String, String> smartsMap = new HashMap<String, String>();
232 for (String s : smarts)
233 {
234 i++;
235 smartsMap.put("prefilter-"+i, s);
236 }
237
238 int index = -1;
239 int maxBufferSize = 2000;
240 ArrayList<IAtomContainer> buffer = new ArrayList<IAtomContainer>(500);
241 try {
242 while (reader.hasNext())
243 {
244 index++;
245 if (logger!=null)
246 {
247 logger.log(Level.FINE,"Prefiltering structure " + index);
248 }
249 IAtomContainer mol = reader.next();
250
251 ManySMARTSQuery msq = new ManySMARTSQuery(mol, smartsMap);
252 if (msq.hasProblems())
253 {
254 String msg = "WARNING! Problems while searching for "
255 + "specific atoms/bonds using SMARTS: "
256 + msq.getMessage();
257 throw new DENOPTIMException(msg,msq.getProblem());
258 }
259 Map<String, Mappings> allMatches = msq.getAllMatches();
260
261 if (allMatches.size()==0)
262 {
263 buffer.add(mol);
264 } else {
265 String hits = "";
266 for (String s : allMatches.keySet())
267 hits = hits + DenoptimIO.NL + smartsMap.get(s);
268 if (logger!=null)
269 {
270 logger.log(Level.INFO,"Found match for " + hits
271 + "Rejecting structure " + index + ": "
272 + mol.getTitle());
273 }
274 }
275
276 // If max buffer size is reached, then bump to file
277 if (buffer.size() >= maxBufferSize)
278 {
279 DenoptimIO.writeSDFFile(output.getAbsolutePath(), buffer,
280 true);
281 buffer.clear();
282 }
283 }
284 } finally {
285 reader.close();
286 }
287 if (buffer.size() < maxBufferSize)
288 {
289 DenoptimIO.writeSDFFile(output.getAbsolutePath(), buffer, true);
290 buffer.clear();
291 }
292 }
293
294//-----------------------------------------------------------------------------
295
315 public static boolean fragmentation(File input, FragmenterParameters settings,
316 File output, Logger logger) throws CDKException, IOException,
317 DENOPTIMException, IllegalArgumentException, UndetectedFileFormatException
318 {
321
322 int totalProd = 0;
323 int totalKept = 0;
324 int index = -1;
325 try {
326 while (iterator.hasNext())
327 {
328 index++;
329 if (logger!=null)
330 {
331 logger.log(Level.FINE,"Fragmenting structure " + index);
332 }
333 IAtomContainer mol = iterator.next();
334 String molName = "noname-mol" + index;
335 if (mol.getTitle()!=null && !mol.getTitle().isBlank())
336 molName = mol.getTitle();
337
338 // Generate the fragments
339 List<Vertex> fragments = fragmentation(mol,
340 settings.getCuttingRules(),
341 logger);
342 if (logger!=null)
343 {
344 logger.log(Level.FINE,"Fragmentation produced "
345 + fragments.size() + " fragments.");
346 }
347 totalProd += fragments.size();
348
349 // Post-fragmentation processing of fragments
350 List<Vertex> keptFragments = new ArrayList<Vertex>();
351 int fragCounter = 0;
352 for (Vertex frag : fragments)
353 {
354 // Add metadata
355 String fragIdStr = "From_" + molName + "_" + fragCounter;
356 frag.setProperty("cdk:Title", fragIdStr);
357 fragCounter++;
358 manageFragmentCollection(frag, fragCounter, settings,
359 keptFragments, logger);
360 }
361 if (logger!=null)
362 {
363 logger.log(Level.FINE,"Fragments surviving post-"
364 + "processing: " + keptFragments.size());
365 }
366 totalKept += keptFragments.size();
367 if (!settings.doManageIsomorphicFamilies() && totalKept>0)
368 {
370 keptFragments,true);
371 }
372 }
373 } finally {
374 iterator.close();
375 }
376
377 // Did we actually produce anything? We might not...
378 if (totalProd==0)
379 {
380 if (logger!=null)
381 {
382 logger.log(Level.WARNING,"No fragment produced. Cutting rules "
383 + "were ineffective on the given structures.");
384 }
385 return false;
386 } else if (totalKept==0)
387 {
388 if (logger!=null)
389 {
390 logger.log(Level.WARNING,"No fragment kept out of " + totalProd
391 + " produced fragments. Filtering criteria might be "
392 + "too restrictive.");
393 }
394 return false;
395 }
396 return true;
397 }
398
399//------------------------------------------------------------------------------
400
409 public static List<Vertex> fragmentation(IAtomContainer mol,
410 List<CuttingRule> rules, Logger logger) throws DENOPTIMException
411 {
412 Fragment masterFrag = new Fragment(mol,BBType.UNDEFINED);
413 IAtomContainer fragsMol = masterFrag.getIAtomContainer();
414
415 // Identify bonds
416 Map<String, List<MatchedBond>> matchingbonds =
417 FragmenterTools.getMatchingBondsAllInOne(fragsMol,rules,logger);
418
419 // Select bonds to cut and what rule to use for cutting them
420 int cutId = -1;
421 for (CuttingRule rule : rules) // NB: iterator follows rule's priority
422 {
423 String ruleName = rule.getName();
424
425 // Skip unmatched rules
426 if (!matchingbonds.keySet().contains(ruleName))
427 continue;
428
429 for (MatchedBond tb: matchingbonds.get(ruleName))
430 {
431 IAtom atmA = tb.getAtmSubClass0();
432 IAtom atmB = tb.getAtmSubClass1();
433
434 //ignore if bond already broken
435 if (!fragsMol.getConnectedAtomsList(atmA).contains(atmB))
436 {
437 continue;
438 }
439
440 //treatment of n-hapto ligands
441 if (rule.isHAPTO())
442 {
443 // Get central atom (i.e., the "mono-hapto" side,
444 // typically the metal)
445 // As a convention the central atom has subclass '0'
446 IAtom centralAtm = atmA;
447
448 // Get list of candidates for hapto-system:
449 // they have same cutting Rule and central metal
450 ArrayList<IAtom> candidatesForHapto = new ArrayList<IAtom>();
451 for (MatchedBond tbForHapto : matchingbonds.get(ruleName))
452 {
453 //Consider only bond involving same central atom
454 if (tbForHapto.getAtmSubClass0() == centralAtm)
455 candidatesForHapto.add(tbForHapto.getAtmSubClass1());
456 }
457
458 // Select atoms in n-hapto system: contiguous neighbors with
459 // same type of bond with the same central atom.
460 Set<IAtom> atmsInHapto = new HashSet<IAtom>();
461 atmsInHapto.add(tb.getAtmSubClass1());
462 atmsInHapto = exploreHapticity(tb.getAtmSubClass1(),
463 centralAtm, candidatesForHapto, fragsMol);
464 if (atmsInHapto.size() == 1)
465 {
466 logger.log(Level.WARNING,"Unable to find more than one "
467 + "bond involved in high-hapticity ligand! "
468 + "Bond ignored.");
469 continue;
470 }
471
472 // Check existence of all bonds involved in multi-hapto system
473 boolean isSystemIntact = true;
474 for (IAtom ligAtm : atmsInHapto)
475 {
476 List<IAtom> nbrsOfLigAtm =
477 fragsMol.getConnectedAtomsList(ligAtm);
478 if (!nbrsOfLigAtm.contains(centralAtm))
479 {
480 isSystemIntact = false;
481 break;
482 }
483 }
484
485 // If not, it means that another rule already acted on the
486 // system thus kill this attempt without generating du-atom
487 if (!isSystemIntact)
488 continue;
489
490 // A dummy atom will be used to define attachment point of
491 // ligand with high hapticity
492 Point3d dummyP3d = new Point3d(); //Used also for 2D
493 for (IAtom ligAtm : atmsInHapto)
494 {
495 Point3d ligP3d = MoleculeUtils.getPoint3d(ligAtm);
496 dummyP3d.x = dummyP3d.x + ligP3d.x;
497 dummyP3d.y = dummyP3d.y + ligP3d.y;
498 dummyP3d.z = dummyP3d.z + ligP3d.z;
499 }
500
501 dummyP3d.x = dummyP3d.x / (double) atmsInHapto.size();
502 dummyP3d.y = dummyP3d.y / (double) atmsInHapto.size();
503 dummyP3d.z = dummyP3d.z / (double) atmsInHapto.size();
504
505 //Add Dummy atom to molecular object
506 //if no other Du is already in the same position
507 IAtom dummyAtm = null;
508 for (IAtom oldDu : fragsMol.atoms())
509 {
512 {
513 Point3d oldDuP3d = oldDu.getPoint3d();
514 if (oldDuP3d.distance(dummyP3d) < 0.002)
515 {
516 dummyAtm = oldDu;
517 break;
518 }
519 }
520 }
521
522 if (dummyAtm==null)
523 {
524 dummyAtm = new PseudoAtom(DENOPTIMConstants.DUMMYATMSYMBOL);
525 dummyAtm.setPoint3d(dummyP3d);
526 fragsMol.addAtom(dummyAtm);
527 }
528
529 // Modify connectivity of atoms involved in high-hapticity
530 // coordination creation of Du-to-ATM bonds
531 // By internal convention the bond order is "SINGLE".
532 IBond.Order border = IBond.Order.valueOf("SINGLE");
533
534 for (IAtom ligAtm : atmsInHapto)
535 {
536 List<IAtom> nbrsOfDu = fragsMol.getConnectedAtomsList(
537 dummyAtm);
538 if (!nbrsOfDu.contains(ligAtm))
539 {
540 // Add bond with dummy
541 Bond bnd = new Bond(dummyAtm,ligAtm,border);
542 fragsMol.addBond(bnd);
543 }
544 // Remove bonds between central and coordinating atoms
545 IBond oldBnd = fragsMol.getBond(centralAtm,ligAtm);
546 fragsMol.removeBond(oldBnd);
547 }
548
549 // NB: by convention the "first" class (i.e., the ???:0 class)
550 // is always on the central atom.
551 AttachmentPoint apA = masterFrag.addAPOnAtom(centralAtm,
552 rule.getAPClass0(),
553 MoleculeUtils.getPoint3d(dummyAtm));
554 AttachmentPoint apB = masterFrag.addAPOnAtom(dummyAtm,
555 rule.getAPClass1(),
556 MoleculeUtils.getPoint3d(centralAtm));
557
558 cutId++;
559 apA.setCutId(cutId);
560 apB.setCutId(cutId);
561 } else {
562 //treatment of mono-hapto ligands
563 IBond bnd = fragsMol.getBond(atmA,atmB);
564 fragsMol.removeBond(bnd);
565
566 AttachmentPoint apA = masterFrag.addAPOnAtom(atmA,
567 rule.getAPClass0(),
569 AttachmentPoint apB = masterFrag.addAPOnAtom(atmB,
570 rule.getAPClass1(),
572
573 cutId++;
574 apA.setCutId(cutId);
575 apB.setCutId(cutId);
576 } //end of if (hapticity>1)
577 } //end of loop over matching bonds
578 } //end of loop over rules
579
580 // Extract isolated fragments
581 ArrayList<Vertex> fragments = new ArrayList<Vertex>();
582 Set<Integer> doneAlready = new HashSet<Integer>();
583 for (int idx=0 ; idx<masterFrag.getAtomCount(); idx++)
584 {
585 if (doneAlready.contains(idx))
586 continue;
587
588 Fragment cloneOfMaster = masterFrag.clone();
589 IAtomContainer iac = cloneOfMaster.getIAtomContainer();
590 Set<IAtom> atmsToKeep = exploreConnectivity(iac.getAtom(idx), iac);
591 atmsToKeep.stream().forEach(atm -> doneAlready.add(iac.indexOf(atm)));
592
593 Set<IAtom> atmsToRemove = new HashSet<IAtom>();
594 for (IAtom atm : cloneOfMaster.atoms())
595 {
596 if (!atmsToKeep.contains(atm))
597 {
598 atmsToRemove.add(atm);
599 }
600 }
601 cloneOfMaster.removeAtoms(atmsToRemove);
602 if (cloneOfMaster.getAttachmentPoints().size()>0)
603 fragments.add(cloneOfMaster);
604 }
605
606 return fragments;
607 }
608
609//------------------------------------------------------------------------------
623 static Set<IAtom> exploreHapticity(IAtom seed, IAtom centralAtom,
624 ArrayList<IAtom> candidates, IAtomContainer mol)
625 {
626 Set<IAtom> atmsInHapto = new HashSet<IAtom>();
627 atmsInHapto.add(seed);
628 ArrayList<IAtom> toVisitAtoms = new ArrayList<IAtom>();
629 toVisitAtoms.add(seed);
630 ArrayList<IAtom> visitedAtoms = new ArrayList<IAtom>();
631 while (toVisitAtoms.size()>0)
632 {
633 ArrayList<IAtom> toVisitLater = new ArrayList<IAtom>();
634 for (IAtom atomInFocus : toVisitAtoms)
635 {
636 if (visitedAtoms.contains(atomInFocus)
637 || atomInFocus==centralAtom)
638 continue;
639 else
640 visitedAtoms.add(atomInFocus);
641
642 if (candidates.contains(atomInFocus))
643 {
644 atmsInHapto.add(atomInFocus);
645 toVisitLater.addAll(mol.getConnectedAtomsList(atomInFocus));
646 }
647 }
648 toVisitAtoms.clear();
649 toVisitAtoms.addAll(toVisitLater);
650 }
651 return atmsInHapto;
652 }
653
654//------------------------------------------------------------------------------
663 static Set<IAtom> exploreConnectivity(IAtom seed, IAtomContainer mol)
664 {
665 Set<IAtom> atmsReachableFromSeed = new HashSet<IAtom>();
666 ArrayList<IAtom> toVisitAtoms = new ArrayList<IAtom>();
667 toVisitAtoms.add(seed);
668 ArrayList<IAtom> visitedAtoms = new ArrayList<IAtom>();
669 while (toVisitAtoms.size()>0)
670 {
671 ArrayList<IAtom> toVisitLater = new ArrayList<IAtom>();
672 for (IAtom atomInFocus : toVisitAtoms)
673 {
674 if (visitedAtoms.contains(atomInFocus))
675 continue;
676 else
677 visitedAtoms.add(atomInFocus);
678
679 atmsReachableFromSeed.add(atomInFocus);
680 toVisitLater.addAll(mol.getConnectedAtomsList(atomInFocus));
681 }
682 toVisitAtoms.clear();
683 toVisitAtoms.addAll(toVisitLater);
684 }
685 return atmsReachableFromSeed;
686 }
687
688//-----------------------------------------------------------------------------
689
698 static Map<String, List<MatchedBond>> getMatchingBondsAllInOne(
699 IAtomContainer mol, List<CuttingRule> rules, Logger logger)
700 {
701 // Collect all SMARTS queries
702 Map<String,String> smarts = new HashMap<String,String>();
703 for (CuttingRule rule : rules)
704 {
705 smarts.put(rule.getName(),rule.getWholeSMARTSRule());
706 }
707
708 // Prepare a data structure for the return value
709 Map<String, List<MatchedBond>> bondsMatchingRules =
710 new HashMap<String, List<MatchedBond>>();
711
712 // Get all the matches to the SMARTS queries
713 ManySMARTSQuery msq = new ManySMARTSQuery(mol, smarts);
714 if (msq.hasProblems())
715 {
716 if (logger!=null)
717 {
718 logger.log(Level.WARNING, "Problem matching SMARTS: "
719 + msq.getMessage());
720 }
721 return bondsMatchingRules;
722 }
723
724 for (CuttingRule rule : rules)
725 {
726 String ruleName = rule.getName();
727
728 if (msq.getNumMatchesOfQuery(ruleName) == 0)
729 {
730 continue;
731 }
732
733 // Get atoms matching cutting rule queries
734 Mappings purgedPairs = msq.getMatchesOfSMARTS(ruleName);
735
736 // Evaluate subclass membership and eventually store target bonds
737 ArrayList<MatchedBond> bondsMatched = new ArrayList<MatchedBond>();
738 for (int[] pair : purgedPairs)
739 {
740 if (pair.length!=2)
741 {
742 throw new Error("Cutting rule: " + ruleName
743 + " has identified " + pair.length + " atoms "
744 + "instead of 2. Modify rule to make it find a "
745 + "pair of atoms.");
746 }
747 MatchedBond tb = new MatchedBond(mol.getAtom(pair[0]),
748 mol.getAtom(pair[1]), rule);
749
750 // Apply any further option of the cutting rule
751 if (tb.satisfiesRuleOptions(logger))
752 bondsMatched.add(tb);
753 }
754
755 if (!bondsMatched.isEmpty())
756 bondsMatchingRules.put(ruleName, bondsMatched);
757 }
758
759 return bondsMatchingRules;
760 }
761
762//------------------------------------------------------------------------------
763
769 public static void manageFragmentCollection(File input,
770 FragmenterParameters settings,
771 File output, Logger logger) throws DENOPTIMException, IOException,
772 IllegalArgumentException, UndetectedFileFormatException
773 {
774 FileInputStream fis = new FileInputStream(input);
775 IteratingSDFReader reader = new IteratingSDFReader(fis,
776 DefaultChemObjectBuilder.getInstance());
777
778 int index = -1;
779 int maxBufferSize = 2000;
780 ArrayList<Vertex> buffer = new ArrayList<Vertex>(500);
781 try {
782 while (reader.hasNext())
783 {
784 index++;
785 if (logger!=null)
786 {
787 logger.log(Level.FINE,"Processing fragment " + index);
788 }
789 Vertex frag = new Fragment(reader.next(), BBType.UNDEFINED);
790 manageFragmentCollection(frag, index, settings,
791 buffer, logger);
792
793 // If max buffer size is reached, then bump to file
794 if (buffer.size() >= maxBufferSize)
795 {
797 buffer, true);
798 buffer.clear();
799 }
800 }
801 } finally {
802 reader.close();
803 }
804 if (buffer.size() < maxBufferSize)
805 {
807 buffer, true);
808 buffer.clear();
809 }
810 }
811
812//------------------------------------------------------------------------------
813
831 public static void manageFragmentCollection(Vertex frag, int fragCounter,
832 FragmenterParameters settings,
833 List<Vertex> collector, Logger logger)
834 throws DENOPTIMException, IllegalArgumentException,
836 {
837
838 if (!filterFragment((Fragment) frag, settings, logger))
839 {
840 return;
841 }
842
843 //Compare with list of fragments to ignore
844 if (settings.getIgnorableFragments().size() > 0)
845 {
846 if (settings.getIgnorableFragments().stream()
847 .anyMatch(ignorable -> ((Fragment)frag)
848 .isIsomorphicTo(ignorable)))
849 {
850 if (logger!=null)
851 {
852 logger.log(Level.FINE,"Fragment " + fragCounter
853 + " is ignorable.");
854 }
855 return;
856 }
857 }
858
859 //Compare with list of fragments to retain
860 if (settings.getTargetFragments().size() > 0)
861 {
862 if (!settings.getTargetFragments().stream()
863 .anyMatch(ignorable -> ((Fragment)frag)
864 .isIsomorphicTo(ignorable)))
865 {
866 if (logger!=null)
867 {
868 logger.log(Level.FINE,"Fragment " + fragCounter
869 + " doesn't match any target: rejected.");
870 }
871 return;
872 }
873 }
874
875 // Add dummy atoms on linearities
877 && settings.doAddDuOnLinearity())
878 {
880 settings.getLinearAngleLimit());
881 }
882
883 // Management of duplicate fragments:
884 // -> identify duplicates (isomorphic fragments),
885 // -> keep one (or more, if we want to sample the isomorphs),
886 // -> reject the rest.
887 if (settings.doManageIsomorphicFamilies())
888 {
889 synchronized (settings.MANAGEMWSLOTSSLOCK)
890 {
891 String mwSlotID = getMWSlotIdentifier(frag,
892 settings.getMWSlotSize());
893
894 File mwFileUnq = settings.getMWSlotFileNameUnqFrags(
895 mwSlotID);
896 File mwFileAll = settings.getMWSlotFileNameAllFrags(
897 mwSlotID);
898
899 // Compare this fragment with previously seen ones
900 Vertex unqVersion = null;
901 if (mwFileUnq.exists())
902 {
903 ArrayList<Vertex> knownFrags =
905 unqVersion = knownFrags.stream()
906 .filter(knownFrag ->
907 ((Fragment)frag).isIsomorphicTo(knownFrag))
908 .findAny()
909 .orElse(null);
910 }
911 if (unqVersion!=null)
912 {
913 // Identify this unique fragment
914 String isoFamID = unqVersion.getProperty(
916 .toString();
917
918 // Do we already have enough isomorphic family members
919 // for this fragment?
920 int sampleSize = settings.getIsomorphsCount()
921 .get(isoFamID);
922 if (sampleSize < settings.getIsomorphicSampleSize())
923 {
924 // Add this isomorphic version to the sample
925 frag.setProperty(
927 isoFamID);
928 settings.getIsomorphsCount().put(isoFamID,
929 sampleSize+1);
930 DenoptimIO.writeVertexToFile(mwFileAll,
931 FileFormat.VRTXSDF, frag, true);
932 collector.add(frag);
933 } else {
934 // This would be inefficient in the long run
935 // because it by-passes the splitting by MW.
936 // Do not do it!
937 /*
938 if (logger!=null)
939 {
940 logger.log(Level.FINE,"Fragment "
941 + fragCounter
942 + " is isomorphic to unique fragment "
943 + unqVersionID + ", but we already "
944 + "have a sample of " + sampleSize
945 + ": ignoring this fragment from now "
946 + "on.");
947 }
948 settings.getIgnorableFragments().add(frag);
949 */
950 }
951 } else {
952 // This is a never-seen fragment
953 String isoFamID = settings.newIsomorphicFamilyID();
954 frag.setProperty(
956 isoFamID);
957 settings.getIsomorphsCount().put(isoFamID, 1);
958 DenoptimIO.writeVertexToFile(mwFileUnq,
959 FileFormat.VRTXSDF, frag, true);
960 DenoptimIO.writeVertexToFile(mwFileAll,
961 FileFormat.VRTXSDF, frag, true);
962 collector.add(frag);
963 }
964 } // end synchronized block
965 } else {
966 //If we are here, we did not ask to remove duplicates
967 collector.add(frag);
968 }
969 }
970
971//------------------------------------------------------------------------------
972
982 public static boolean filterFragment(Fragment frag,
983 FragmenterParameters settings)
984 {
985 return filterFragment(frag, settings, settings.getLogger());
986 }
987
988//------------------------------------------------------------------------------
989
1001 public static boolean filterFragment(Fragment frag,
1002 FragmenterParameters settings, Logger logger)
1003 {
1004 // Default filtering criteria: get ring of R/*/X/Xx
1005 for (IAtom atm : frag.atoms())
1006 {
1007 if (MoleculeUtils.isElement(atm))
1008 {
1009 continue;
1010 }
1011 String smb = MoleculeUtils.getSymbolOrLabel(atm);
1012 if (DENOPTIMConstants.DUMMYATMSYMBOL.equals(smb))
1013 {
1014 continue;
1015 }
1016 logger.log(Level.FINE,"Removing fragment contains non-element '"
1017 + smb + "'");
1018 return false;
1019 }
1020
1021 if (settings.isWorkingIn3D())
1022 {
1023 // Incomplete 3D fragmentation: an atom has the same coords of an AP.
1024 for (AttachmentPoint ap : frag.getAttachmentPoints())
1025 {
1026 Point3d ap3d = ap.getDirectionVector();
1027 if (ap3d!=null)
1028 {
1029 for (IAtom atm : frag.atoms())
1030 {
1031 Point3d atm3d = MoleculeUtils.getPoint3d(atm);
1032 double dist = ap3d.distance(atm3d);
1033 if (dist < 0.0002)
1034 {
1035 logger.log(Level.FINE,"Removing fragment with AP"
1036 + frag.getIAtomContainer().indexOf(atm)
1037 + " and atom " + MoleculeUtils.getSymbolOrLabel(atm)
1038 + " coincide.");
1039 return false;
1040 }
1041 }
1042 }
1043 }
1044 }
1045 if (settings.doRejectWeirdIsotopes())
1046 {
1047 for (IAtom atm : frag.atoms())
1048 {
1049 if (MoleculeUtils.isElement(atm))
1050 {
1051 // Unconfigured isotope has null mass number
1052 if (atm.getMassNumber() == null)
1053 continue;
1054
1055 String symb = MoleculeUtils.getSymbolOrLabel(atm);
1056 int a = atm.getMassNumber();
1057 try {
1058 IIsotope major = Isotopes.getInstance().getMajorIsotope(symb);
1059 if (a != major.getMassNumber())
1060 {
1061 logger.log(Level.FINE,"Removing fragment containing "
1062 + "isotope "+symb+a+".");
1063 return false;
1064 }
1065 } catch (Throwable t) {
1066 logger.log(Level.WARNING,"Not able to perform Isotope"
1067 + "detection.");
1068 }
1069 }
1070
1071 }
1072 }
1073
1074 // User-controlled filtering criteria
1075
1076 if (settings.getRejectedElements().size() > 0)
1077 {
1078 for (IAtom atm : frag.atoms())
1079 {
1080 String symb = MoleculeUtils.getSymbolOrLabel(atm);
1081 if (settings.getRejectedElements().contains(symb))
1082 {
1083 logger.log(Level.FINE,"Removing fragment containing '"
1084 + symb + "'.");
1085 return false;
1086 }
1087 }
1088 }
1089
1090 if (settings.getRejectedFormulaLessThan().size() > 0
1091 || settings.getRejectedFormulaMoreThan().size() > 0)
1092 {
1093 Map<String,Double> eaMol = FormulaUtils.getElementalanalysis(
1094 frag.getIAtomContainer());
1095
1096 for (Map<String,Double> criterion :
1097 settings.getRejectedFormulaMoreThan())
1098 {
1099 for (String el : criterion.keySet())
1100 {
1101 if (eaMol.containsKey(el))
1102 {
1103 // -0.5 to make it strictly less-than
1104 if (eaMol.get(el) - criterion.get(el) > 0.5)
1105 {
1106 logger.log(Level.FINE,"Removing fragment that "
1107 + "contains too much '" + el + "' "
1108 + "as requested by formula"
1109 + "-based (more-than) settings (" + el
1110 + eaMol.get(el) + " > " + criterion + ").");
1111 return false;
1112 }
1113 }
1114 }
1115 }
1116
1117 Map<String,Double> criterion = settings.getRejectedFormulaLessThan();
1118 for (String el : criterion.keySet())
1119 {
1120 if (!eaMol.containsKey(el))
1121 {
1122 logger.log(Level.FINE,"Removing fragment that does not "
1123 + "contain '" + el + "' as requested by formula"
1124 + "-based (less-than) settings.");
1125 return false;
1126 } else {
1127 // 0.5 to make it strictly more-than
1128 if (eaMol.get(el) - criterion.get(el) < -0.5)
1129 {
1130 logger.log(Level.FINE,"Removing fragment that "
1131 + "contains too little '" + el + "' "
1132 + "as requested by formula"
1133 + "-based settings (" + el
1134 + eaMol.get(el) + " < " + criterion + ").");
1135 return false;
1136 }
1137 }
1138 }
1139
1140 }
1141
1142 if (settings.getRejectedAPClasses().size() > 0)
1143 {
1144 for (APClass apc : frag.getAllAPClasses())
1145 {
1146 for (String s : settings.getRejectedAPClasses())
1147 {
1148 if (apc.toString().startsWith(s))
1149 {
1150 logger.log(Level.FINE,"Removing fragment with APClass "
1151 + apc);
1152 return false;
1153 }
1154 }
1155 }
1156 }
1157
1158 if (settings.getRejectedAPClassCombinations().size() > 0)
1159 {
1160 loopOverCombinations:
1161 for (String[] conditions : settings.getRejectedAPClassCombinations())
1162 {
1163 for (int ip=0; ip<conditions.length; ip++)
1164 {
1165 String condition = conditions[ip];
1166 boolean found = false;
1167 for (APClass apc : frag.getAllAPClasses())
1168 {
1169 if (apc.toString().startsWith(condition))
1170 {
1171 found = true;
1172 continue;
1173 }
1174 }
1175 if (!found)
1176 continue loopOverCombinations;
1177 // Here we do have at least one AP satisfying the condition.
1178 }
1179 // Here we manage or satisfy all conditions. Therefore, we can
1180 // reject this fragment
1181
1182 String allCondsAsString = "";
1183 for (int i=0; i<conditions.length; i++)
1184 allCondsAsString = allCondsAsString + " " + conditions[i];
1185
1186 logger.log(Level.FINE,"Removing fragment with combination of "
1187 + "APClasses matching '" + allCondsAsString + "'.");
1188 return false;
1189 }
1190 }
1191
1192 if (settings.getMaxFragHeavyAtomCount()>0
1193 || settings.getMinFragHeavyAtomCount()>0)
1194 {
1195 int totHeavyAtm = 0;
1196 for (IAtom atm : frag.atoms())
1197 {
1198 if (MoleculeUtils.isElement(atm))
1199 {
1200 String symb = MoleculeUtils.getSymbolOrLabel(atm);
1201 if ((!symb.equals("H")) && (!symb.equals(
1203 totHeavyAtm++;
1204 }
1205 }
1206 if (settings.getMaxFragHeavyAtomCount() > 0
1207 && totHeavyAtm > settings.getMaxFragHeavyAtomCount())
1208 {
1209 logger.log(Level.FINE,"Removing fragment with too many atoms ("
1210 + totHeavyAtm + " < "
1211 + settings.getMaxFragHeavyAtomCount()
1212 + ")");
1213 return false;
1214 }
1215 if (settings.getMinFragHeavyAtomCount() > 0
1216 && totHeavyAtm < settings.getMinFragHeavyAtomCount())
1217 {
1218 logger.log(Level.FINE,"Removing fragment with too few atoms ("
1219 + totHeavyAtm + " < "
1220 + settings.getMinFragHeavyAtomCount()
1221 + ")");
1222 return false;
1223 }
1224 }
1225
1226 if (settings.getFragRejectionSMARTS().size() > 0)
1227 {
1229 settings.getFragRejectionSMARTS());
1230 if (msq.hasProblems())
1231 {
1232 logger.log(Level.WARNING,"Problems evaluating SMARTS-based "
1233 + "rejection criteria. " + msq.getMessage());
1234 }
1235
1236 for (String criterion : settings.getFragRejectionSMARTS().keySet())
1237 {
1238 if (msq.getNumMatchesOfQuery(criterion)>0)
1239 {
1240 logger.log(Level.FINE,"Removing fragment that matches "
1241 + "SMARTS-based rejection criteria '" + criterion
1242 + "'.");
1243 return false;
1244 }
1245 }
1246 }
1247
1248 if (settings.getFragRetentionSMARTS().size() > 0)
1249 {
1251 settings.getFragRetentionSMARTS());
1252 if (msq.hasProblems())
1253 {
1254 logger.log(Level.WARNING,"Problems evaluating SMARTS-based "
1255 + "rejection criteria. " + msq.getMessage());
1256 }
1257
1258 boolean matchesAny = false;
1259 for (String criterion : settings.getFragRetentionSMARTS().keySet())
1260 {
1261 if (msq.getNumMatchesOfQuery(criterion) > 0)
1262 {
1263 matchesAny = true;
1264 break;
1265 }
1266 }
1267 if (!matchesAny)
1268 {
1269 logger.log(Level.FINE,"Removing fragment that does not "
1270 + "match any SMARTS-based retention criteria.");
1271 return false;
1272 }
1273 }
1274 return true;
1275 }
1276
1277//------------------------------------------------------------------------------
1278
1286 public static String getMWSlotIdentifier(Vertex frag, int slotSize)
1287 {
1288 for (IAtom a : frag.getIAtomContainer().atoms())
1289 {
1290 if (a.getImplicitHydrogenCount()==null)
1291 a.setImplicitHydrogenCount(0);
1292 }
1293 double mw = AtomContainerManipulator.getMass(frag.getIAtomContainer());
1294 int slotNum = (int) (mw / (Double.valueOf(slotSize)));
1295 return slotNum*slotSize + "-" + (slotNum+1)*slotSize;
1296 }
1297
1298//------------------------------------------------------------------------------
1299
1300 public static Vertex getRCPForAP(AttachmentPoint ap, APClass rcvApClass)
1301 throws DENOPTIMException
1302 {
1303 IAtomContainer mol = SilentChemObjectBuilder.getInstance()
1304 .newAtomContainer();
1305 Point3d apv = ap.getDirectionVector();
1306 mol.addAtom(new PseudoAtom(RingClosingAttractor.RCALABELPERAPCLASS.get(rcvApClass),
1307 new Point3d(
1308 Double.valueOf(apv.x),
1309 Double.valueOf(apv.y),
1310 Double.valueOf(apv.z))));
1311
1312 Fragment rcv = new Fragment(mol, BBType.FRAGMENT);
1313
1314 Point3d aps = MoleculeUtils.getPoint3d(
1315 ap.getOwner().getIAtomContainer().getAtom(
1316 ap.getAtomPositionNumber()));
1317 rcv.addAP(0, rcvApClass, new Point3d(
1318 Double.valueOf(aps.x),
1319 Double.valueOf(aps.y),
1320 Double.valueOf(aps.z)));
1321 return rcv;
1322 }
1323
1324//------------------------------------------------------------------------------
1325
1326}
General set of constants used in DENOPTIM.
static final Object FORMULASTR
Property name used to store molecular formula as string in an atom container.
static final String DUMMYATMSYMBOL
Symbol of dummy atom.
static final Object ISOMORPHICFAMILYID
Property used to store the identifier of the family of isomorphic fragments that owns a fragment.
Exception thrown when the format of a file is not recognized.
static Map< String, List< MatchedBond > > getMatchingBondsAllInOne(IAtomContainer mol, List< CuttingRule > rules, Logger logger)
Identification of the bonds matching a list of SMARTS queries.
static void checkElementalAnalysisAgainstFormula(File input, File output, Logger logger)
Processes all molecules analyzing the composition of the structure in the chemical representation as ...
static List< Vertex > fragmentation(IAtomContainer mol, List< CuttingRule > rules, Logger logger)
Chops one chemical structure by applying the given cutting rules.
static boolean filterFragment(Fragment frag, FragmenterParameters settings)
Filter fragments according to the criteria defined in the settings.
static Set< IAtom > exploreConnectivity(IAtom seed, IAtomContainer mol)
Explores the connectivity annotating which atoms have been visited.
static Vertex getRCPForAP(AttachmentPoint ap, APClass rcvApClass)
static boolean prepareMolToFragmentation(IAtomContainer mol, FragmenterParameters settings, int index)
Do any pre-processing on a IAtomContainer meant to be fragmented.
static void manageFragmentCollection(Vertex frag, int fragCounter, FragmenterParameters settings, List< Vertex > collector, Logger logger)
Management of fragments: includes application of fragment filters, rejection rules,...
static Set< IAtom > exploreHapticity(IAtom seed, IAtom centralAtom, ArrayList< IAtom > candidates, IAtomContainer mol)
Identifies non-central atoms involved in the same n-hapto ligand as the seed atom.
static boolean filterFragment(Fragment frag, FragmenterParameters settings, Logger logger)
Filter fragments according to the criteria defined in the settings.
static void filterStrucutresBySMARTS(File input, Set< String > smarts, File output, Logger logger)
Removes from the structures anyone that matches any of the given SMARTS queries.
static String getMWSlotIdentifier(Vertex frag, int slotSize)
Determines the name of the MW slot to use when comparing the given fragment with previously stored fr...
static void manageFragmentCollection(File input, FragmenterParameters settings, File output, Logger logger)
Management of fragments: includes application of fragment filters, rejection rules,...
static boolean fragmentation(File input, FragmenterParameters settings, File output, Logger logger)
Performs fragmentation according to the given cutting rules.
An attachment point (AP) is a possibility to attach a Vertex onto the vertex holding the AP (i....
Class representing a continuously connected portion of chemical object holding attachment points.
Definition: Fragment.java:61
void addAP(int atomPositionNumber)
Adds an attachment point with a dummy APClass.
Definition: Fragment.java:343
AttachmentPoint addAPOnAtom(IAtom srcAtm, APClass apc, Point3d vector)
Add an attachment point to the specifies atom.
Definition: Fragment.java:424
List< AttachmentPoint > getAttachmentPoints()
Definition: Fragment.java:1141
Fragment clone()
Returns a deep copy of this fragments.
Definition: Fragment.java:733
Iterable< IAtom > atoms()
Definition: Fragment.java:822
IAtomContainer getIAtomContainer()
Definition: Fragment.java:788
void removeAtoms(Collection< IAtom > atoms)
Removes a list of atoms and updates the list of attachment points.
Definition: Fragment.java:913
A vertex is a data structure that has an identity and holds a list of AttachmentPoints.
Definition: Vertex.java:62
ArrayList< APClass > getAllAPClasses()
Returns the list of all APClasses present on this vertex.
Definition: Vertex.java:793
Object getProperty(Object property)
Definition: Vertex.java:1224
abstract IAtomContainer getIAtomContainer()
void setProperty(Object key, Object property)
Definition: Vertex.java:1236
The RingClosingAttractor represent the available valence/connection that allows to close a ring.
static final HashMap< APClass, String > RCALABELPERAPCLASS
Conventional labels for attractor pseudoatom.
Utility methods for input/output.
static File writeVertexesToFile(File file, FileFormat format, List< Vertex > vertexes)
Writes vertexes to file.
static void writeSDFFile(String fileName, IAtomContainer mol)
Writes IAtomContainer to SDF file.
static File writeVertexToFile(File file, FileFormat format, Vertex vertex, boolean append)
Writes vertexes to file.
static ArrayList< Vertex > readVertexes(File file, Vertex.BBType bbt)
Reads Vertexes from any file that can contain such items.
An iterator that take IAtomContainers from a file, possibly using an available iterating reader,...
void close()
Close the memory-efficient iterator if any is open.
Logger getLogger()
Get the name of the program specific logger.
A cutting rule with three SMARTS queries (atom 1, bond, atom2) and options.
Parameters controlling execution of the fragmenter.
boolean doRejectWeirdIsotopes
Flag requesting to reject fragments with minor isotopes.
boolean addExplicitH
Flag requesting to add explicit H atoms.
Boolean satisfiesRuleOptions
Flag indicating that we have checked the additional option from the cutting rule (otherwise this flag...
Toll to add/remove dummy atoms from linearities or multi-hapto sites.
static void addDummiesOnLinearities(Fragment frag, double angLim)
Append dummy atoms on otherwise linear arrangements of atoms.
Utilities for manipulating molecular formulas.
static boolean compareFormulaAndElementalAnalysis(String formula, IAtomContainer mol)
Compares the molecular formula formatted as from the Cambridge Structural Database (CSD) against the ...
static Map< String, Double > getElementalanalysis(IAtomContainer mol)
Threads Deuterium as a different element than Hydrogen.
Container of lists of atoms matching a list of SMARTS.
Map< String, Mappings > getAllMatches()
int getNumMatchesOfQuery(String query)
Utilities for molecule conversion.
static void setZeroImplicitHydrogensToAllAtoms(IAtomContainer iac)
Sets zero implicit hydrogen count to all atoms.
static int getDimensions(IAtomContainer mol)
Determines the dimensionality of the given chemical object.
static String getSymbolOrLabel(IAtom atm)
Gets either the elemental symbol (for standard atoms) of the label (for pseudo-atoms).
static void ensureNoUnsetBondOrders(IAtomContainer iac)
Sets bond order = single to all otherwise unset bonds.
static void explicitHydrogens(IAtomContainer mol)
Converts all the implicit hydrogens to explicit.
static Point3d getPoint3d(IAtom atm)
Return the 3D coordinates, if present.
static boolean isElement(IAtom atom)
Check element symbol corresponds to real element of Periodic Table.
File formats identified by DENOPTIM.
Definition: FileFormat.java:32
The type of building block.
Definition: Vertex.java:87
FRG_PARAMS
Parameters controlling the fragmenter.