1package denoptim.fragmenter;
4import java.io.FileInputStream;
5import java.io.IOException;
6import java.util.ArrayList;
7import java.util.HashMap;
8import java.util.HashSet;
12import java.util.logging.Level;
13import java.util.logging.Logger;
15import javax.vecmath.Point3d;
17import org.openscience.cdk.Bond;
18import org.openscience.cdk.DefaultChemObjectBuilder;
19import org.openscience.cdk.PseudoAtom;
20import org.openscience.cdk.config.Isotopes;
21import org.openscience.cdk.exception.CDKException;
22import org.openscience.cdk.interfaces.IAtom;
23import org.openscience.cdk.interfaces.IAtomContainer;
24import org.openscience.cdk.interfaces.IBond;
25import org.openscience.cdk.interfaces.IIsotope;
26import org.openscience.cdk.io.iterator.IteratingSDFReader;
27import org.openscience.cdk.isomorphism.Mappings;
28import org.openscience.cdk.silent.SilentChemObjectBuilder;
29import org.openscience.cdk.tools.manipulator.AtomContainerManipulator;
31import denoptim.constants.DENOPTIMConstants;
32import denoptim.exception.DENOPTIMException;
33import denoptim.files.FileFormat;
34import denoptim.files.UndetectedFileFormatException;
35import denoptim.graph.APClass;
36import denoptim.graph.AttachmentPoint;
37import denoptim.graph.Fragment;
38import denoptim.graph.Vertex;
39import denoptim.graph.Vertex.BBType;
40import denoptim.graph.rings.RingClosingAttractor;
41import denoptim.io.DenoptimIO;
42import denoptim.io.IteratingAtomContainerReader;
43import denoptim.programs.RunTimeParameters.ParametersType;
44import denoptim.programs.fragmenter.CuttingRule;
45import denoptim.programs.fragmenter.FragmenterParameters;
46import denoptim.programs.fragmenter.MatchedBond;
47import denoptim.utils.DummyAtomHandler;
48import denoptim.utils.FormulaUtils;
49import denoptim.utils.ManySMARTSQuery;
50import denoptim.utils.MoleculeUtils;
72 File output, Logger logger)
75 FileInputStream fis =
new FileInputStream(input);
76 IteratingSDFReader reader =
new IteratingSDFReader(fis,
77 DefaultChemObjectBuilder.getInstance());
80 int maxBufferSize = 2000;
81 ArrayList<IAtomContainer> buffer =
new ArrayList<IAtomContainer>(500);
83 while (reader.hasNext())
88 logger.log(Level.FINE,
"Checking elemental analysis of "
89 +
"structure " + index);
91 IAtomContainer mol = reader.next();
95 +
"' not found in molecule " + index +
" in file "
96 + input +
". Cannot compare formula with elemental"
109 logger.log(Level.INFO,
"Inconsistency between elemental "
110 +
"analysis of structure and molecular formula."
111 +
" Rejecting structure " + index +
": "
117 if (buffer.size() >= maxBufferSize)
128 if (buffer.size() < maxBufferSize)
163 }
catch (CDKException e)
167 settings.
getLogger().log(Level.WARNING,
"Some bond order "
168 +
"are unset and attempt to kekulize the "
169 +
"system has failed "
170 +
"for structure " + index +
"."
171 +
"This hampers use of SMARTS queries, which "
173 +
"not work as expected. Structure " + index
175 +
"be rejected. You can avoid rejection by using "
178 +
"UNSETTOSINGLEBO, but you'll "
179 +
"still be using a peculiar connectivity "
181 +
"many bonds are artificially markes as "
183 +
"avoid use of 'UNSET' bond order. "
184 +
"Further details on the problem: "
188 settings.
getLogger().log(Level.WARNING,
"Failed "
190 +
"for structure " + index
191 +
" but UNSETTOSINGLEBO "
192 +
"keyword used. Forcing use of single bonds to "
193 +
"replace bonds with unset order.");
194 for (IBond bnd : mol.bonds())
196 if (bnd.getOrder().equals(IBond.Order.UNSET))
198 bnd.setOrder(IBond.Order.SINGLE);
220 File output, Logger logger)
223 FileInputStream fis =
new FileInputStream(input);
224 IteratingSDFReader reader =
new IteratingSDFReader(fis,
225 DefaultChemObjectBuilder.getInstance());
228 Map<String, String> smartsMap =
new HashMap<String, String>();
229 for (String s : smarts)
232 smartsMap.put(
"prefilter-"+i, s);
236 int maxBufferSize = 2000;
237 ArrayList<IAtomContainer> buffer =
new ArrayList<IAtomContainer>(500);
239 while (reader.hasNext())
244 logger.log(Level.FINE,
"Prefiltering structure " + index);
246 IAtomContainer mol = reader.next();
251 String msg =
"WARNING! Problems while searching for "
252 +
"specific atoms/bonds using SMARTS: "
258 if (allMatches.size()==0)
263 for (String s : allMatches.keySet())
264 hits = hits + DenoptimIO.NL + smartsMap.get(s);
267 logger.log(Level.INFO,
"Found match for " + hits
268 +
"Rejecting structure " + index +
": "
274 if (buffer.size() >= maxBufferSize)
284 if (buffer.size() < maxBufferSize)
313 File output, Logger logger)
throws CDKException, IOException,
328 logger.log(Level.FINE,
"Fragmenting structure " + index);
330 IAtomContainer mol = iterator.
next();
331 String molName =
"noname-mol" + index;
332 if (mol.getTitle()!=
null && !mol.getTitle().isBlank())
333 molName = mol.getTitle();
337 settings.getCuttingRules(),
341 logger.log(Level.FINE,
"Fragmentation produced "
342 + fragments.size() +
" fragments.");
344 totalProd += fragments.size();
347 List<Vertex> keptFragments =
new ArrayList<Vertex>();
349 for (
Vertex frag : fragments)
352 String fragIdStr =
"From_" + molName +
"_" + fragCounter;
353 frag.setProperty(
"cdk:Title", fragIdStr);
356 keptFragments, logger);
360 logger.log(Level.FINE,
"Fragments surviving post-"
361 +
"processing: " + keptFragments.size());
363 totalKept += keptFragments.size();
364 if (!settings.doManageIsomorphicFamilies() && totalKept>0)
379 logger.log(Level.WARNING,
"No fragment produced. Cutting rules "
380 +
"were ineffective on the given structures.");
383 }
else if (totalKept==0)
387 logger.log(Level.WARNING,
"No fragment kept out of " + totalProd
388 +
" produced fragments. Filtering criteria might be "
389 +
"too restrictive.");
413 Map<String, List<MatchedBond>> matchingbonds =
420 String ruleName = rule.getName();
423 if (!matchingbonds.keySet().contains(ruleName))
428 IAtom atmA = tb.getAtmSubClass0();
429 IAtom atmB = tb.getAtmSubClass1();
432 if (!fragsMol.getConnectedAtomsList(atmA).contains(atmB))
443 IAtom centralAtm = atmA;
447 ArrayList<IAtom> candidatesForHapto =
new ArrayList<IAtom>();
448 for (
MatchedBond tbForHapto : matchingbonds.get(ruleName))
451 if (tbForHapto.getAtmSubClass0() == centralAtm)
452 candidatesForHapto.add(tbForHapto.getAtmSubClass1());
457 Set<IAtom> atmsInHapto =
new HashSet<IAtom>();
458 atmsInHapto.add(tb.getAtmSubClass1());
460 centralAtm, candidatesForHapto, fragsMol);
461 if (atmsInHapto.size() == 1)
463 logger.log(Level.WARNING,
"Unable to find more than one "
464 +
"bond involved in high-hapticity ligand! "
470 boolean isSystemIntact =
true;
471 for (IAtom ligAtm : atmsInHapto)
473 List<IAtom> nbrsOfLigAtm =
474 fragsMol.getConnectedAtomsList(ligAtm);
475 if (!nbrsOfLigAtm.contains(centralAtm))
477 isSystemIntact =
false;
489 Point3d dummyP3d =
new Point3d();
490 for (IAtom ligAtm : atmsInHapto)
493 dummyP3d.x = dummyP3d.x + ligP3d.x;
494 dummyP3d.y = dummyP3d.y + ligP3d.y;
495 dummyP3d.z = dummyP3d.z + ligP3d.z;
498 dummyP3d.x = dummyP3d.x / (double) atmsInHapto.size();
499 dummyP3d.y = dummyP3d.y / (double) atmsInHapto.size();
500 dummyP3d.z = dummyP3d.z / (double) atmsInHapto.size();
504 IAtom dummyAtm =
null;
505 for (IAtom oldDu : fragsMol.atoms())
510 Point3d oldDuP3d = oldDu.getPoint3d();
511 if (oldDuP3d.distance(dummyP3d) < 0.002)
522 dummyAtm.setPoint3d(dummyP3d);
523 fragsMol.addAtom(dummyAtm);
529 IBond.Order border = IBond.Order.valueOf(
"SINGLE");
531 for (IAtom ligAtm : atmsInHapto)
533 List<IAtom> nbrsOfDu = fragsMol.getConnectedAtomsList(
535 if (!nbrsOfDu.contains(ligAtm))
538 Bond bnd =
new Bond(dummyAtm,ligAtm,border);
539 fragsMol.addBond(bnd);
542 IBond oldBnd = fragsMol.getBond(centralAtm,ligAtm);
543 fragsMol.removeBond(oldBnd);
560 IBond bnd = fragsMol.getBond(atmA,atmB);
561 fragsMol.removeBond(bnd);
578 ArrayList<Vertex> fragments =
new ArrayList<Vertex>();
579 Set<Integer> doneAlready =
new HashSet<Integer>();
582 if (doneAlready.contains(idx))
588 atmsToKeep.stream().forEach(atm -> doneAlready.add(iac.indexOf(atm)));
590 Set<IAtom> atmsToRemove =
new HashSet<IAtom>();
591 for (IAtom atm : cloneOfMaster.
atoms())
593 if (!atmsToKeep.contains(atm))
595 atmsToRemove.add(atm);
600 fragments.add(cloneOfMaster);
621 ArrayList<IAtom> candidates, IAtomContainer mol)
623 Set<IAtom> atmsInHapto =
new HashSet<IAtom>();
624 atmsInHapto.add(seed);
625 ArrayList<IAtom> toVisitAtoms =
new ArrayList<IAtom>();
626 toVisitAtoms.add(seed);
627 ArrayList<IAtom> visitedAtoms =
new ArrayList<IAtom>();
628 while (toVisitAtoms.size()>0)
630 ArrayList<IAtom> toVisitLater =
new ArrayList<IAtom>();
631 for (IAtom atomInFocus : toVisitAtoms)
633 if (visitedAtoms.contains(atomInFocus)
634 || atomInFocus==centralAtom)
637 visitedAtoms.add(atomInFocus);
639 if (candidates.contains(atomInFocus))
641 atmsInHapto.add(atomInFocus);
642 toVisitLater.addAll(mol.getConnectedAtomsList(atomInFocus));
645 toVisitAtoms.clear();
646 toVisitAtoms.addAll(toVisitLater);
662 Set<IAtom> atmsReachableFromSeed =
new HashSet<IAtom>();
663 ArrayList<IAtom> toVisitAtoms =
new ArrayList<IAtom>();
664 toVisitAtoms.add(seed);
665 ArrayList<IAtom> visitedAtoms =
new ArrayList<IAtom>();
666 while (toVisitAtoms.size()>0)
668 ArrayList<IAtom> toVisitLater =
new ArrayList<IAtom>();
669 for (IAtom atomInFocus : toVisitAtoms)
671 if (visitedAtoms.contains(atomInFocus))
674 visitedAtoms.add(atomInFocus);
676 atmsReachableFromSeed.add(atomInFocus);
677 toVisitLater.addAll(mol.getConnectedAtomsList(atomInFocus));
679 toVisitAtoms.clear();
680 toVisitAtoms.addAll(toVisitLater);
682 return atmsReachableFromSeed;
696 IAtomContainer mol, List<CuttingRule> rules, Logger logger)
699 Map<String,String> smarts =
new HashMap<String,String>();
702 smarts.put(rule.getName(),rule.getWholeSMARTSRule());
706 Map<String, List<MatchedBond>> bondsMatchingRules =
707 new HashMap<String, List<MatchedBond>>();
711 if (msq.hasProblems())
715 logger.log(Level.WARNING,
"Problem matching SMARTS: "
718 return bondsMatchingRules;
723 String ruleName = rule.getName();
725 if (msq.getNumMatchesOfQuery(ruleName) == 0)
731 Mappings purgedPairs = msq.getMatchesOfSMARTS(ruleName);
734 ArrayList<MatchedBond> bondsMatched =
new ArrayList<MatchedBond>();
735 for (
int[] pair : purgedPairs)
739 throw new Error(
"Cutting rule: " + ruleName
740 +
" has identified " + pair.length +
" atoms "
741 +
"instead of 2. Modify rule to make it find a "
745 mol.getAtom(pair[1]), rule);
749 bondsMatched.add(tb);
752 if (!bondsMatched.isEmpty())
753 bondsMatchingRules.put(ruleName, bondsMatched);
756 return bondsMatchingRules;
771 FileInputStream fis =
new FileInputStream(input);
772 IteratingSDFReader reader =
new IteratingSDFReader(fis,
773 DefaultChemObjectBuilder.getInstance());
776 int maxBufferSize = 2000;
777 ArrayList<Vertex> buffer =
new ArrayList<Vertex>(500);
779 while (reader.hasNext())
784 logger.log(Level.FINE,
"Processing fragment " + index);
791 if (buffer.size() >= maxBufferSize)
801 if (buffer.size() < maxBufferSize)
830 List<Vertex> collector, Logger logger)
841 if (settings.getIgnorableFragments().size() > 0)
843 if (settings.getIgnorableFragments().stream()
844 .anyMatch(ignorable -> ((
Fragment)frag)
845 .isIsomorphicTo(ignorable)))
849 logger.log(Level.FINE,
"Fragment " + fragCounter
857 if (settings.getTargetFragments().size() > 0)
859 if (!settings.getTargetFragments().stream()
860 .anyMatch(ignorable -> ((
Fragment)frag)
861 .isIsomorphicTo(ignorable)))
865 logger.log(Level.FINE,
"Fragment " + fragCounter
866 +
" doesn't match any target: rejected.");
874 && settings.doAddDuOnLinearity())
877 settings.getLinearAngleLimit());
884 if (settings.doManageIsomorphicFamilies())
886 synchronized (settings.MANAGEMWSLOTSSLOCK)
889 settings.getMWSlotSize());
891 File mwFileUnq = settings.getMWSlotFileNameUnqFrags(
893 File mwFileAll = settings.getMWSlotFileNameAllFrags(
898 if (mwFileUnq.exists())
900 ArrayList<Vertex> knownFrags =
902 unqVersion = knownFrags.stream()
904 ((
Fragment)frag).isIsomorphicTo(knownFrag))
908 if (unqVersion!=
null)
917 int sampleSize = settings.getIsomorphsCount()
919 if (sampleSize < settings.getIsomorphicSampleSize())
925 settings.getIsomorphsCount().put(isoFamID,
950 String isoFamID = settings.newIsomorphicFamilyID();
954 settings.getIsomorphsCount().put(isoFamID, 1);
1002 for (IAtom atm : frag.
atoms())
1013 logger.log(Level.FINE,
"Removing fragment contains non-element '"
1023 Point3d ap3d = ap.getDirectionVector();
1026 for (IAtom atm : frag.
atoms())
1029 double dist = ap3d.distance(atm3d);
1032 logger.log(Level.FINE,
"Removing fragment with AP"
1044 for (IAtom atm : frag.
atoms())
1049 if (atm.getMassNumber() ==
null)
1053 int a = atm.getMassNumber();
1055 IIsotope major = Isotopes.getInstance().getMajorIsotope(symb);
1056 if (a != major.getMassNumber())
1058 logger.log(Level.FINE,
"Removing fragment containing "
1059 +
"isotope "+symb+a+
".");
1062 }
catch (Throwable t) {
1063 logger.log(Level.WARNING,
"Not able to perform Isotope"
1075 for (IAtom atm : frag.
atoms())
1080 logger.log(Level.FINE,
"Removing fragment containing '"
1093 for (Map<String,Double> criterion :
1096 for (String el : criterion.keySet())
1098 if (eaMol.containsKey(el))
1101 if (eaMol.get(el) - criterion.get(el) > 0.5)
1103 logger.log(Level.FINE,
"Removing fragment that "
1104 +
"contains too much '" + el +
"' "
1105 +
"as requested by formula"
1106 +
"-based (more-than) settings (" + el
1107 + eaMol.get(el) +
" > " + criterion +
").");
1115 for (String el : criterion.keySet())
1117 if (!eaMol.containsKey(el))
1119 logger.log(Level.FINE,
"Removing fragment that does not "
1120 +
"contain '" + el +
"' as requested by formula"
1121 +
"-based (less-than) settings.");
1125 if (eaMol.get(el) - criterion.get(el) < -0.5)
1127 logger.log(Level.FINE,
"Removing fragment that "
1128 +
"contains too little '" + el +
"' "
1129 +
"as requested by formula"
1130 +
"-based settings (" + el
1131 + eaMol.get(el) +
" < " + criterion +
").");
1145 if (apc.toString().startsWith(s))
1147 logger.log(Level.FINE,
"Removing fragment with APClass "
1157 loopOverCombinations:
1160 for (
int ip=0; ip<conditions.length; ip++)
1162 String condition = conditions[ip];
1163 boolean found =
false;
1166 if (apc.toString().startsWith(condition))
1173 continue loopOverCombinations;
1179 String allCondsAsString =
"";
1180 for (
int i=0; i<conditions.length; i++)
1181 allCondsAsString = allCondsAsString +
" " + conditions[i];
1183 logger.log(Level.FINE,
"Removing fragment with combination of "
1184 +
"APClasses matching '" + allCondsAsString +
"'.");
1192 int totHeavyAtm = 0;
1193 for (IAtom atm : frag.
atoms())
1198 if ((!symb.equals(
"H")) && (!symb.equals(
1206 logger.log(Level.FINE,
"Removing fragment with too many atoms ("
1207 + totHeavyAtm +
" < "
1215 logger.log(Level.FINE,
"Removing fragment with too few atoms ("
1216 + totHeavyAtm +
" < "
1229 logger.log(Level.WARNING,
"Problems evaluating SMARTS-based "
1230 +
"rejection criteria. " + msq.
getMessage());
1237 logger.log(Level.FINE,
"Removing fragment that matches "
1238 +
"SMARTS-based rejection criteria '" + criterion
1251 logger.log(Level.WARNING,
"Problems evaluating SMARTS-based "
1252 +
"rejection criteria. " + msq.
getMessage());
1255 boolean matchesAny =
false;
1266 logger.log(Level.FINE,
"Removing fragment that does not "
1267 +
"match any SMARTS-based retention criteria.");
1287 if (a.getImplicitHydrogenCount()==
null)
1288 a.setImplicitHydrogenCount(0);
1291 int slotNum = (int) (mw / (Double.valueOf(slotSize)));
1292 return slotNum*slotSize +
"-" + (slotNum+1)*slotSize;
1300 IAtomContainer mol = SilentChemObjectBuilder.getInstance()
1301 .newAtomContainer();
1302 Point3d apv = ap.getDirectionVector();
1305 Double.valueOf(apv.x),
1306 Double.valueOf(apv.y),
1307 Double.valueOf(apv.z))));
1312 ap.getOwner().getIAtomContainer().getAtom(
1313 ap.getAtomPositionNumber()));
1314 rcv.
addAP(0, rcvApClass,
new Point3d(
1315 Double.valueOf(aps.x),
1316 Double.valueOf(aps.y),
1317 Double.valueOf(aps.z)));
General set of constants used in DENOPTIM.
static final Object FORMULASTR
Property name used to store molecular formula as string in an atom container.
static final String DUMMYATMSYMBOL
Symbol of dummy atom.
static final Object ISOMORPHICFAMILYID
Property used to store the identifier of the family of isomorphic fragments that owns a fragment.
An attachment point (AP) is a possibility to attach a Vertex onto the vertex holding the AP (i....
Class representing a continuously connected portion of chemical object holding attachment points.
void addAP(int atomPositionNumber)
Adds an attachment point with a dummy APClass.
AttachmentPoint addAPOnAtom(IAtom srcAtm, APClass apc, Point3d vector)
Add an attachment point to the specifies atom.
List< AttachmentPoint > getAttachmentPoints()
Fragment clone()
Returns a deep copy of this fragments.
Iterable< IAtom > atoms()
IAtomContainer getIAtomContainer()
void removeAtoms(Collection< IAtom > atoms)
Removes a list of atoms and updates the list of attachment points.
A vertex is a data structure that has an identity and holds a list of AttachmentPoints.
ArrayList< APClass > getAllAPClasses()
Returns the list of all APClasses present on this vertex.
Object getProperty(Object property)
abstract IAtomContainer getIAtomContainer()
void setProperty(Object key, Object property)
The RingClosingAttractor represent the available valence/connection that allows to close a ring.
static final HashMap< APClass, String > RCALABELPERAPCLASS
Conventional labels for attractor pseudoatom.
Utility methods for input/output.
static File writeVertexesToFile(File file, FileFormat format, List< Vertex > vertexes)
Writes vertexes to file.
static void writeSDFFile(String fileName, IAtomContainer mol)
Writes IAtomContainer to SDF file.
static File writeVertexToFile(File file, FileFormat format, Vertex vertex, boolean append)
Writes vertexes to file.
static ArrayList< Vertex > readVertexes(File file, Vertex.BBType bbt)
Reads Vertexes from any file that can contain such items.
An iterator that take IAtomContainers from a file, possibly using an available iterating reader,...
void close()
Close the memory-efficient iterator if any is open.
Logger getLogger()
Get the name of the program specific logger.
A cutting rule with three SMARTS queries (atom 1, bond, atom2) and options.
Parameters controlling execution of the fragmenter.
boolean doRejectWeirdIsotopes
Flag requesting to reject fragments with minor isotopes.
Map< String, Double > getRejectedFormulaLessThan()
int getMinFragHeavyAtomCount()
Set< String > getRejectedElements()
Map< String, String > getFragRetentionSMARTS()
int getMaxFragHeavyAtomCount()
Set< Map< String, Double > > getRejectedFormulaMoreThan()
Map< String, String > getFragRejectionSMARTS()
Set< String[]> getRejectedAPClassCombinations()
boolean addExplicitH
Flag requesting to add explicit H atoms.
Set< String > getRejectedAPClasses()
boolean acceptUnsetToSingeBO()
Boolean satisfiesRuleOptions
Flag indicating that we have checked the additional option from the cutting rule (otherwise this flag...
Toll to add/remove dummy atoms from linearities or multi-hapto sites.
static void addDummiesOnLinearities(Fragment frag, double angLim)
Append dummy atoms on otherwise linear arrangements of atoms.
Container of lists of atoms matching a list of SMARTS.
Map< String, Mappings > getAllMatches()
int getNumMatchesOfQuery(String query)
Utilities for molecule conversion.
static void setZeroImplicitHydrogensToAllAtoms(IAtomContainer iac)
Sets zero implicit hydrogen count to all atoms.
static int getDimensions(IAtomContainer mol)
Determines the dimensionality of the given chemical object.
static String getSymbolOrLabel(IAtom atm)
Gets either the elemental symbol (for standard atoms) of the label (for pseudo-atoms).
static void ensureNoUnsetBondOrders(IAtomContainer iac)
Sets bond order = single to all otherwise unset bonds.
static void explicitHydrogens(IAtomContainer mol)
Converts all the implicit hydrogens to explicit.
static Point3d getPoint3d(IAtom atm)
Return the 3D coordinates, if present.
static boolean isElement(IAtom atom)
Check element symbol corresponds to real element of Periodic Table.
The type of building block.
Identifier of the type of parameters.
FRG_PARAMS
Parameters controlling the fragmenter.