1package denoptim.fragmenter;
4import java.io.FileInputStream;
5import java.io.IOException;
6import java.util.ArrayList;
7import java.util.HashMap;
8import java.util.HashSet;
12import java.util.logging.Level;
13import java.util.logging.Logger;
15import javax.vecmath.Point3d;
17import org.openscience.cdk.Bond;
18import org.openscience.cdk.DefaultChemObjectBuilder;
19import org.openscience.cdk.PseudoAtom;
20import org.openscience.cdk.config.Isotopes;
21import org.openscience.cdk.exception.CDKException;
22import org.openscience.cdk.interfaces.IAtom;
23import org.openscience.cdk.interfaces.IAtomContainer;
24import org.openscience.cdk.interfaces.IBond;
25import org.openscience.cdk.interfaces.IIsotope;
26import org.openscience.cdk.io.iterator.IteratingSDFReader;
27import org.openscience.cdk.isomorphism.Mappings;
28import org.openscience.cdk.silent.SilentChemObjectBuilder;
29import org.openscience.cdk.tools.manipulator.AtomContainerManipulator;
31import denoptim.constants.DENOPTIMConstants;
32import denoptim.exception.DENOPTIMException;
33import denoptim.files.FileFormat;
34import denoptim.files.UndetectedFileFormatException;
35import denoptim.graph.APClass;
36import denoptim.graph.AttachmentPoint;
37import denoptim.graph.Fragment;
38import denoptim.graph.Vertex;
39import denoptim.graph.Vertex.BBType;
40import denoptim.graph.rings.RingClosingAttractor;
41import denoptim.io.DenoptimIO;
42import denoptim.io.IteratingAtomContainerReader;
43import denoptim.programs.RunTimeParameters.ParametersType;
44import denoptim.programs.fragmenter.CuttingRule;
45import denoptim.programs.fragmenter.FragmenterParameters;
46import denoptim.programs.fragmenter.MatchedBond;
47import denoptim.utils.DummyAtomHandler;
48import denoptim.utils.FormulaUtils;
49import denoptim.utils.ManySMARTSQuery;
50import denoptim.utils.MoleculeUtils;
72 File output, Logger logger)
75 FileInputStream fis =
new FileInputStream(input);
76 IteratingSDFReader reader =
new IteratingSDFReader(fis,
77 DefaultChemObjectBuilder.getInstance());
80 int maxBufferSize = 2000;
81 ArrayList<IAtomContainer> buffer =
new ArrayList<IAtomContainer>(500);
83 while (reader.hasNext())
88 logger.log(Level.FINE,
"Checking elemental analysis of "
89 +
"structure " + index);
91 IAtomContainer mol = reader.next();
95 +
"' not found in molecule " + index +
" in file "
96 + input +
". Cannot compare formula with elemental"
109 logger.log(Level.INFO,
"Inconsistency between elemental "
110 +
"analysis of structure and molecular formula."
111 +
" Rejecting structure " + index +
": "
117 if (buffer.size() >= maxBufferSize)
128 if (buffer.size() < maxBufferSize)
163 }
catch (CDKException e)
165 if (e.getMessage().contains(
"Cannot assign Kekulé structure"))
169 settings.
getLogger().log(Level.WARNING,
"Some bond order "
170 +
"are unset and attempt to kekulize the "
171 +
"system has failed "
172 +
"for structure " + index +
". "
173 +
"This hampers use of SMARTS queries, which "
175 +
"not work as expected. Structure " + index
176 +
" will be rejected. "
177 +
"You can avoid rejection by using "
180 +
"UNSETTOSINGLEBO, but you'll "
181 +
"still be using a peculiar connectivity "
183 +
"many bonds are artificially marked as "
185 +
"avoid use of 'UNSET' bond order. "
186 +
"Further details on the problem: "
190 settings.
getLogger().log(Level.WARNING,
"Failed "
192 +
"for structure " + index
193 +
" but UNSETTOSINGLEBO "
194 +
"keyword used. Forcing use of single bonds to "
195 +
"replace bonds with unset order.");
196 for (IBond bnd : mol.bonds())
198 if (bnd.getOrder().equals(IBond.Order.UNSET))
200 bnd.setOrder(IBond.Order.SINGLE);
223 File output, Logger logger)
226 FileInputStream fis =
new FileInputStream(input);
227 IteratingSDFReader reader =
new IteratingSDFReader(fis,
228 DefaultChemObjectBuilder.getInstance());
231 Map<String, String> smartsMap =
new HashMap<String, String>();
232 for (String s : smarts)
235 smartsMap.put(
"prefilter-"+i, s);
239 int maxBufferSize = 2000;
240 ArrayList<IAtomContainer> buffer =
new ArrayList<IAtomContainer>(500);
242 while (reader.hasNext())
247 logger.log(Level.FINE,
"Prefiltering structure " + index);
249 IAtomContainer mol = reader.next();
254 String msg =
"WARNING! Problems while searching for "
255 +
"specific atoms/bonds using SMARTS: "
261 if (allMatches.size()==0)
266 for (String s : allMatches.keySet())
267 hits = hits + DenoptimIO.NL + smartsMap.get(s);
270 logger.log(Level.INFO,
"Found match for " + hits
271 +
"Rejecting structure " + index +
": "
277 if (buffer.size() >= maxBufferSize)
287 if (buffer.size() < maxBufferSize)
316 File output, Logger logger)
throws CDKException, IOException,
331 logger.log(Level.FINE,
"Fragmenting structure " + index);
333 IAtomContainer mol = iterator.
next();
334 String molName =
"noname-mol" + index;
335 if (mol.getTitle()!=
null && !mol.getTitle().isBlank())
336 molName = mol.getTitle();
340 settings.getCuttingRules(),
344 logger.log(Level.FINE,
"Fragmentation produced "
345 + fragments.size() +
" fragments.");
347 totalProd += fragments.size();
350 List<Vertex> keptFragments =
new ArrayList<Vertex>();
352 for (
Vertex frag : fragments)
355 String fragIdStr =
"From_" + molName +
"_" + fragCounter;
356 frag.setProperty(
"cdk:Title", fragIdStr);
359 keptFragments, logger);
363 logger.log(Level.FINE,
"Fragments surviving post-"
364 +
"processing: " + keptFragments.size());
366 totalKept += keptFragments.size();
367 if (!settings.doManageIsomorphicFamilies() && totalKept>0)
382 logger.log(Level.WARNING,
"No fragment produced. Cutting rules "
383 +
"were ineffective on the given structures.");
386 }
else if (totalKept==0)
390 logger.log(Level.WARNING,
"No fragment kept out of " + totalProd
391 +
" produced fragments. Filtering criteria might be "
392 +
"too restrictive.");
416 Map<String, List<MatchedBond>> matchingbonds =
423 String ruleName = rule.getName();
426 if (!matchingbonds.keySet().contains(ruleName))
431 IAtom atmA = tb.getAtmSubClass0();
432 IAtom atmB = tb.getAtmSubClass1();
435 if (!fragsMol.getConnectedAtomsList(atmA).contains(atmB))
446 IAtom centralAtm = atmA;
450 ArrayList<IAtom> candidatesForHapto =
new ArrayList<IAtom>();
451 for (
MatchedBond tbForHapto : matchingbonds.get(ruleName))
454 if (tbForHapto.getAtmSubClass0() == centralAtm)
455 candidatesForHapto.add(tbForHapto.getAtmSubClass1());
460 Set<IAtom> atmsInHapto =
new HashSet<IAtom>();
461 atmsInHapto.add(tb.getAtmSubClass1());
463 centralAtm, candidatesForHapto, fragsMol);
464 if (atmsInHapto.size() == 1)
466 logger.log(Level.WARNING,
"Unable to find more than one "
467 +
"bond involved in high-hapticity ligand! "
473 boolean isSystemIntact =
true;
474 for (IAtom ligAtm : atmsInHapto)
476 List<IAtom> nbrsOfLigAtm =
477 fragsMol.getConnectedAtomsList(ligAtm);
478 if (!nbrsOfLigAtm.contains(centralAtm))
480 isSystemIntact =
false;
492 Point3d dummyP3d =
new Point3d();
493 for (IAtom ligAtm : atmsInHapto)
496 dummyP3d.x = dummyP3d.x + ligP3d.x;
497 dummyP3d.y = dummyP3d.y + ligP3d.y;
498 dummyP3d.z = dummyP3d.z + ligP3d.z;
501 dummyP3d.x = dummyP3d.x / (double) atmsInHapto.size();
502 dummyP3d.y = dummyP3d.y / (double) atmsInHapto.size();
503 dummyP3d.z = dummyP3d.z / (double) atmsInHapto.size();
507 IAtom dummyAtm =
null;
508 for (IAtom oldDu : fragsMol.atoms())
513 Point3d oldDuP3d = oldDu.getPoint3d();
514 if (oldDuP3d.distance(dummyP3d) < 0.002)
525 dummyAtm.setPoint3d(dummyP3d);
526 fragsMol.addAtom(dummyAtm);
532 IBond.Order border = IBond.Order.valueOf(
"SINGLE");
534 for (IAtom ligAtm : atmsInHapto)
536 List<IAtom> nbrsOfDu = fragsMol.getConnectedAtomsList(
538 if (!nbrsOfDu.contains(ligAtm))
541 Bond bnd =
new Bond(dummyAtm,ligAtm,border);
542 fragsMol.addBond(bnd);
545 IBond oldBnd = fragsMol.getBond(centralAtm,ligAtm);
546 fragsMol.removeBond(oldBnd);
563 IBond bnd = fragsMol.getBond(atmA,atmB);
564 fragsMol.removeBond(bnd);
581 ArrayList<Vertex> fragments =
new ArrayList<Vertex>();
582 Set<Integer> doneAlready =
new HashSet<Integer>();
585 if (doneAlready.contains(idx))
591 atmsToKeep.stream().forEach(atm -> doneAlready.add(iac.indexOf(atm)));
593 Set<IAtom> atmsToRemove =
new HashSet<IAtom>();
594 for (IAtom atm : cloneOfMaster.
atoms())
596 if (!atmsToKeep.contains(atm))
598 atmsToRemove.add(atm);
603 fragments.add(cloneOfMaster);
624 ArrayList<IAtom> candidates, IAtomContainer mol)
626 Set<IAtom> atmsInHapto =
new HashSet<IAtom>();
627 atmsInHapto.add(seed);
628 ArrayList<IAtom> toVisitAtoms =
new ArrayList<IAtom>();
629 toVisitAtoms.add(seed);
630 ArrayList<IAtom> visitedAtoms =
new ArrayList<IAtom>();
631 while (toVisitAtoms.size()>0)
633 ArrayList<IAtom> toVisitLater =
new ArrayList<IAtom>();
634 for (IAtom atomInFocus : toVisitAtoms)
636 if (visitedAtoms.contains(atomInFocus)
637 || atomInFocus==centralAtom)
640 visitedAtoms.add(atomInFocus);
642 if (candidates.contains(atomInFocus))
644 atmsInHapto.add(atomInFocus);
645 toVisitLater.addAll(mol.getConnectedAtomsList(atomInFocus));
648 toVisitAtoms.clear();
649 toVisitAtoms.addAll(toVisitLater);
665 Set<IAtom> atmsReachableFromSeed =
new HashSet<IAtom>();
666 ArrayList<IAtom> toVisitAtoms =
new ArrayList<IAtom>();
667 toVisitAtoms.add(seed);
668 ArrayList<IAtom> visitedAtoms =
new ArrayList<IAtom>();
669 while (toVisitAtoms.size()>0)
671 ArrayList<IAtom> toVisitLater =
new ArrayList<IAtom>();
672 for (IAtom atomInFocus : toVisitAtoms)
674 if (visitedAtoms.contains(atomInFocus))
677 visitedAtoms.add(atomInFocus);
679 atmsReachableFromSeed.add(atomInFocus);
680 toVisitLater.addAll(mol.getConnectedAtomsList(atomInFocus));
682 toVisitAtoms.clear();
683 toVisitAtoms.addAll(toVisitLater);
685 return atmsReachableFromSeed;
699 IAtomContainer mol, List<CuttingRule> rules, Logger logger)
702 Map<String,String> smarts =
new HashMap<String,String>();
705 smarts.put(rule.getName(),rule.getWholeSMARTSRule());
709 Map<String, List<MatchedBond>> bondsMatchingRules =
710 new HashMap<String, List<MatchedBond>>();
714 if (msq.hasProblems())
718 logger.log(Level.WARNING,
"Problem matching SMARTS: "
721 return bondsMatchingRules;
726 String ruleName = rule.getName();
728 if (msq.getNumMatchesOfQuery(ruleName) == 0)
734 Mappings purgedPairs = msq.getMatchesOfSMARTS(ruleName);
737 ArrayList<MatchedBond> bondsMatched =
new ArrayList<MatchedBond>();
738 for (
int[] pair : purgedPairs)
742 throw new Error(
"Cutting rule: " + ruleName
743 +
" has identified " + pair.length +
" atoms "
744 +
"instead of 2. Modify rule to make it find a "
748 mol.getAtom(pair[1]), rule);
752 bondsMatched.add(tb);
755 if (!bondsMatched.isEmpty())
756 bondsMatchingRules.put(ruleName, bondsMatched);
759 return bondsMatchingRules;
774 FileInputStream fis =
new FileInputStream(input);
775 IteratingSDFReader reader =
new IteratingSDFReader(fis,
776 DefaultChemObjectBuilder.getInstance());
779 int maxBufferSize = 2000;
780 ArrayList<Vertex> buffer =
new ArrayList<Vertex>(500);
782 while (reader.hasNext())
787 logger.log(Level.FINE,
"Processing fragment " + index);
794 if (buffer.size() >= maxBufferSize)
804 if (buffer.size() < maxBufferSize)
833 List<Vertex> collector, Logger logger)
844 if (settings.getIgnorableFragments().size() > 0)
846 if (settings.getIgnorableFragments().stream()
847 .anyMatch(ignorable -> ((
Fragment)frag)
848 .isIsomorphicTo(ignorable)))
852 logger.log(Level.FINE,
"Fragment " + fragCounter
860 if (settings.getTargetFragments().size() > 0)
862 if (!settings.getTargetFragments().stream()
863 .anyMatch(ignorable -> ((
Fragment)frag)
864 .isIsomorphicTo(ignorable)))
868 logger.log(Level.FINE,
"Fragment " + fragCounter
869 +
" doesn't match any target: rejected.");
877 && settings.doAddDuOnLinearity())
880 settings.getLinearAngleLimit());
887 if (settings.doManageIsomorphicFamilies())
889 synchronized (settings.MANAGEMWSLOTSSLOCK)
892 settings.getMWSlotSize());
894 File mwFileUnq = settings.getMWSlotFileNameUnqFrags(
896 File mwFileAll = settings.getMWSlotFileNameAllFrags(
901 if (mwFileUnq.exists())
903 ArrayList<Vertex> knownFrags =
905 unqVersion = knownFrags.stream()
907 ((
Fragment)frag).isIsomorphicTo(knownFrag))
911 if (unqVersion!=
null)
920 int sampleSize = settings.getIsomorphsCount()
922 if (sampleSize < settings.getIsomorphicSampleSize())
928 settings.getIsomorphsCount().put(isoFamID,
953 String isoFamID = settings.newIsomorphicFamilyID();
957 settings.getIsomorphsCount().put(isoFamID, 1);
1005 for (IAtom atm : frag.
atoms())
1016 logger.log(Level.FINE,
"Removing fragment contains non-element '"
1026 Point3d ap3d = ap.getDirectionVector();
1029 for (IAtom atm : frag.
atoms())
1032 double dist = ap3d.distance(atm3d);
1035 logger.log(Level.FINE,
"Removing fragment with AP"
1047 for (IAtom atm : frag.
atoms())
1052 if (atm.getMassNumber() ==
null)
1056 int a = atm.getMassNumber();
1058 IIsotope major = Isotopes.getInstance().getMajorIsotope(symb);
1059 if (a != major.getMassNumber())
1061 logger.log(Level.FINE,
"Removing fragment containing "
1062 +
"isotope "+symb+a+
".");
1065 }
catch (Throwable t) {
1066 logger.log(Level.WARNING,
"Not able to perform Isotope"
1078 for (IAtom atm : frag.
atoms())
1083 logger.log(Level.FINE,
"Removing fragment containing '"
1096 for (Map<String,Double> criterion :
1099 for (String el : criterion.keySet())
1101 if (eaMol.containsKey(el))
1104 if (eaMol.get(el) - criterion.get(el) > 0.5)
1106 logger.log(Level.FINE,
"Removing fragment that "
1107 +
"contains too much '" + el +
"' "
1108 +
"as requested by formula"
1109 +
"-based (more-than) settings (" + el
1110 + eaMol.get(el) +
" > " + criterion +
").");
1118 for (String el : criterion.keySet())
1120 if (!eaMol.containsKey(el))
1122 logger.log(Level.FINE,
"Removing fragment that does not "
1123 +
"contain '" + el +
"' as requested by formula"
1124 +
"-based (less-than) settings.");
1128 if (eaMol.get(el) - criterion.get(el) < -0.5)
1130 logger.log(Level.FINE,
"Removing fragment that "
1131 +
"contains too little '" + el +
"' "
1132 +
"as requested by formula"
1133 +
"-based settings (" + el
1134 + eaMol.get(el) +
" < " + criterion +
").");
1148 if (apc.toString().startsWith(s))
1150 logger.log(Level.FINE,
"Removing fragment with APClass "
1160 loopOverCombinations:
1163 for (
int ip=0; ip<conditions.length; ip++)
1165 String condition = conditions[ip];
1166 boolean found =
false;
1169 if (apc.toString().startsWith(condition))
1176 continue loopOverCombinations;
1182 String allCondsAsString =
"";
1183 for (
int i=0; i<conditions.length; i++)
1184 allCondsAsString = allCondsAsString +
" " + conditions[i];
1186 logger.log(Level.FINE,
"Removing fragment with combination of "
1187 +
"APClasses matching '" + allCondsAsString +
"'.");
1195 int totHeavyAtm = 0;
1196 for (IAtom atm : frag.
atoms())
1201 if ((!symb.equals(
"H")) && (!symb.equals(
1209 logger.log(Level.FINE,
"Removing fragment with too many atoms ("
1210 + totHeavyAtm +
" < "
1218 logger.log(Level.FINE,
"Removing fragment with too few atoms ("
1219 + totHeavyAtm +
" < "
1232 logger.log(Level.WARNING,
"Problems evaluating SMARTS-based "
1233 +
"rejection criteria. " + msq.
getMessage());
1240 logger.log(Level.FINE,
"Removing fragment that matches "
1241 +
"SMARTS-based rejection criteria '" + criterion
1254 logger.log(Level.WARNING,
"Problems evaluating SMARTS-based "
1255 +
"rejection criteria. " + msq.
getMessage());
1258 boolean matchesAny =
false;
1269 logger.log(Level.FINE,
"Removing fragment that does not "
1270 +
"match any SMARTS-based retention criteria.");
1290 if (a.getImplicitHydrogenCount()==
null)
1291 a.setImplicitHydrogenCount(0);
1294 int slotNum = (int) (mw / (Double.valueOf(slotSize)));
1295 return slotNum*slotSize +
"-" + (slotNum+1)*slotSize;
1303 IAtomContainer mol = SilentChemObjectBuilder.getInstance()
1304 .newAtomContainer();
1305 Point3d apv = ap.getDirectionVector();
1308 Double.valueOf(apv.x),
1309 Double.valueOf(apv.y),
1310 Double.valueOf(apv.z))));
1315 ap.getOwner().getIAtomContainer().getAtom(
1316 ap.getAtomPositionNumber()));
1317 rcv.
addAP(0, rcvApClass,
new Point3d(
1318 Double.valueOf(aps.x),
1319 Double.valueOf(aps.y),
1320 Double.valueOf(aps.z)));
General set of constants used in DENOPTIM.
static final Object FORMULASTR
Property name used to store molecular formula as string in an atom container.
static final String DUMMYATMSYMBOL
Symbol of dummy atom.
static final Object ISOMORPHICFAMILYID
Property used to store the identifier of the family of isomorphic fragments that owns a fragment.
An attachment point (AP) is a possibility to attach a Vertex onto the vertex holding the AP (i....
Class representing a continuously connected portion of chemical object holding attachment points.
void addAP(int atomPositionNumber)
Adds an attachment point with a dummy APClass.
AttachmentPoint addAPOnAtom(IAtom srcAtm, APClass apc, Point3d vector)
Add an attachment point to the specifies atom.
List< AttachmentPoint > getAttachmentPoints()
Fragment clone()
Returns a deep copy of this fragments.
Iterable< IAtom > atoms()
IAtomContainer getIAtomContainer()
void removeAtoms(Collection< IAtom > atoms)
Removes a list of atoms and updates the list of attachment points.
A vertex is a data structure that has an identity and holds a list of AttachmentPoints.
ArrayList< APClass > getAllAPClasses()
Returns the list of all APClasses present on this vertex.
Object getProperty(Object property)
abstract IAtomContainer getIAtomContainer()
void setProperty(Object key, Object property)
The RingClosingAttractor represent the available valence/connection that allows to close a ring.
static final HashMap< APClass, String > RCALABELPERAPCLASS
Conventional labels for attractor pseudoatom.
Utility methods for input/output.
static File writeVertexesToFile(File file, FileFormat format, List< Vertex > vertexes)
Writes vertexes to file.
static void writeSDFFile(String fileName, IAtomContainer mol)
Writes IAtomContainer to SDF file.
static File writeVertexToFile(File file, FileFormat format, Vertex vertex, boolean append)
Writes vertexes to file.
static ArrayList< Vertex > readVertexes(File file, Vertex.BBType bbt)
Reads Vertexes from any file that can contain such items.
An iterator that take IAtomContainers from a file, possibly using an available iterating reader,...
void close()
Close the memory-efficient iterator if any is open.
Logger getLogger()
Get the name of the program specific logger.
A cutting rule with three SMARTS queries (atom 1, bond, atom2) and options.
Parameters controlling execution of the fragmenter.
boolean doRejectWeirdIsotopes
Flag requesting to reject fragments with minor isotopes.
Map< String, Double > getRejectedFormulaLessThan()
int getMinFragHeavyAtomCount()
Set< String > getRejectedElements()
Map< String, String > getFragRetentionSMARTS()
int getMaxFragHeavyAtomCount()
Set< Map< String, Double > > getRejectedFormulaMoreThan()
Map< String, String > getFragRejectionSMARTS()
Set< String[]> getRejectedAPClassCombinations()
boolean addExplicitH
Flag requesting to add explicit H atoms.
Set< String > getRejectedAPClasses()
boolean acceptUnsetToSingeBO()
Boolean satisfiesRuleOptions
Flag indicating that we have checked the additional option from the cutting rule (otherwise this flag...
Toll to add/remove dummy atoms from linearities or multi-hapto sites.
static void addDummiesOnLinearities(Fragment frag, double angLim)
Append dummy atoms on otherwise linear arrangements of atoms.
Container of lists of atoms matching a list of SMARTS.
Map< String, Mappings > getAllMatches()
int getNumMatchesOfQuery(String query)
Utilities for molecule conversion.
static void setZeroImplicitHydrogensToAllAtoms(IAtomContainer iac)
Sets zero implicit hydrogen count to all atoms.
static int getDimensions(IAtomContainer mol)
Determines the dimensionality of the given chemical object.
static String getSymbolOrLabel(IAtom atm)
Gets either the elemental symbol (for standard atoms) of the label (for pseudo-atoms).
static void ensureNoUnsetBondOrders(IAtomContainer iac)
Sets bond order = single to all otherwise unset bonds.
static void explicitHydrogens(IAtomContainer mol)
Converts all the implicit hydrogens to explicit.
static Point3d getPoint3d(IAtom atm)
Return the 3D coordinates, if present.
static boolean isElement(IAtom atom)
Check element symbol corresponds to real element of Periodic Table.
The type of building block.
Identifier of the type of parameters.
FRG_PARAMS
Parameters controlling the fragmenter.