$darkmode
DENOPTIM
TopoTemplateProducer.java
Go to the documentation of this file.
1package denoptim.fragmenter;
2
3import java.util.ArrayList;
4import java.util.HashMap;
5import java.util.HashSet;
6import java.util.List;
7import java.util.Map;
8import java.util.Set;
9import java.util.logging.Logger;
10
11import org.openscience.cdk.interfaces.IAtom;
12import org.openscience.cdk.interfaces.IAtomContainer;
13import org.openscience.cdk.interfaces.IBond;
14import org.openscience.cdk.silent.SilentChemObjectBuilder;
15
16import denoptim.constants.DENOPTIMConstants;
17import denoptim.utils.DummyAtomHandler;
18import denoptim.utils.MoleculeUtils;
19
21{
22
26 Set<IAtom> topoCriticalAtoms = new HashSet<>();
27
31 IAtomContainer originalIAC;
32
36 boolean produceHDepleted = false;
37
38//-----------------------------------------------------------------------------
39
45 public TopoTemplateProducer(IAtomContainer originalIAC)
46 {
47 this.originalIAC = originalIAC;
48 initialize();
49 }
50
51//-----------------------------------------------------------------------------
52
53 private void initialize()
54 {
55 // Find all boundary atoms: atoms connected to atoms with different vertex IDs
56 Set<IAtom> boundaryAtoms = new HashSet<>();
57 Map<IAtom, Long> atomToVertexId = new HashMap<>();
58 Set<Long> visitedVertexIds = new HashSet<>();
59 Set<Long> visitedVertexIdsBoundaries = new HashSet<>();
60 Set<IAtom> atomsWithAPs = new HashSet<>();
61
62 // collect vertex IDs and identify boundary atoms
63 for (IAtom atom : originalIAC.atoms())
64 {
65 Object vidProp = atom.getProperty(DENOPTIMConstants.ATMPROPVERTEXID);
66 if (vidProp == null)
67 {
68 // If no vertex ID, keep all atoms (can't optimize)
69 produceHDepleted = true;
70 vidProp = -1L;
71 }
72 Long vid = Long.parseLong(vidProp.toString());
73 visitedVertexIds.add(vid);
74 atomToVertexId.put(atom, vid);
75
76 Object apsProp = atom.getProperty(DENOPTIMConstants.ATMPROPAPS);
77 if (apsProp != null)
78 {
79 atomsWithAPs.add(atom);
80 }
81
82 // Check neighbors for different vertex IDs
83 List<IAtom> neighbors = originalIAC.getConnectedAtomsList(atom);
84 for (IAtom neighbor : neighbors)
85 {
86 Object nbrVidProp = neighbor.getProperty(DENOPTIMConstants.ATMPROPVERTEXID);
87 if (nbrVidProp != null)
88 {
89 Long nbrVid = Long.parseLong(nbrVidProp.toString());
90 if (!vid.equals(nbrVid))
91 {
92 boundaryAtoms.add(atom);
93 boundaryAtoms.add(neighbor);
94 visitedVertexIdsBoundaries.add(vid);
95 visitedVertexIdsBoundaries.add(nbrVid);
96 break;
97 }
98 }
99 }
100 }
101
102 // NB: is we have a single vertex in the template, then there are no boundary atom!
103 // We'll go forward with the H-depleted version.
104
105 // Ensure we have visited all vertexes, or H-depleted version is needed
106 visitedVertexIds.removeAll(visitedVertexIdsBoundaries);
107 if (!visitedVertexIds.isEmpty())
108 {
109 produceHDepleted = true;
110 }
111
112 // If no boundary atoms found, all atoms have same vertex ID,
113 // fold to using H-depleted version
114 if (boundaryAtoms.isEmpty())
115 {
116 produceHDepleted = true;
117 }
118
120 {
121 // We did not manage to find a sensible subset possibly because the input
122 // is a disconnected graph, or s single-vertex graph.
123 // So, we make a simplified version removing all H and dummy atoms
124 for (IAtom atom : originalIAC.atoms())
125 {
126 // DUmmy atoms that are not AP sources are not considered
128 {
129 if (atomsWithAPs.contains(atom))
130 {
131 topoCriticalAtoms.add(atom);
132 } else {
133 continue;
134 }
135 }
136 if (atom.getSymbol().equals("H"))
137 {
138 if (atomsWithAPs.contains(atom))
139 {
140 topoCriticalAtoms.add(atom);
141 }
142 } else {
143 topoCriticalAtoms.add(atom);
144 }
145 }
146 } else {
147 // Use the boundaries to identify the smallest set of atoms needed
148 // to identify matching topology.
149 // For each pair of boundary atoms, find shortest path
150 List<IAtom> boundaryList = new ArrayList<>(boundaryAtoms);
151 for (int i = 0; i < boundaryList.size(); i++)
152 {
153 for (int j = i + 1; j < boundaryList.size(); j++)
154 {
155 IAtom start = boundaryList.get(i);
156 IAtom end = boundaryList.get(j);
157
158 // Find shortest path between these boundary atoms
159 //
160 // WARNING: this method can be very costly for large molecules.
161 //
162 List<IAtom> path = MoleculeUtils.findShortestPath(originalIAC,
163 start, end, atomToVertexId);
164 if (path != null)
165 {
166 topoCriticalAtoms.addAll(path);
167 }
168 }
169 }
170 }
171 }
172
173//-----------------------------------------------------------------------------
174
185 public IAtomContainer getTemplateWithBufferShell(int bufferShellSize)
186 {
187 IAtomContainer reduced = SilentChemObjectBuilder.getInstance().newAtomContainer();
188
189 // Define the atoms to keep considering topology-critical atoms and the buffer shell
190 Set<IAtom> atomsToKeep = new HashSet<>(topoCriticalAtoms);
191 Set<IAtom> thisLevelAtoms = new HashSet<>(topoCriticalAtoms);
192 for (int i = 0; i < bufferShellSize; i++)
193 {
194 if (atomsToKeep.size() == originalIAC.getAtomCount())
195 {
196 // No more atoms can be added
197 break;
198 }
199 Set<IAtom> nextLevelAtoms = new HashSet<>();
200 for (IAtom atm : thisLevelAtoms)
201 {
202 List<IAtom> neighbors = originalIAC.getConnectedAtomsList(atm);
203 for (IAtom neighbor : neighbors)
204 {
205 if (!atomsToKeep.contains(neighbor))
206 {
207 // Exclude any pseudo atom
208 if (!MoleculeUtils.isElement(neighbor))
209 {
210 continue;
211 }
212 atomsToKeep.add(neighbor);
213 nextLevelAtoms.add(neighbor);
214 }
215 }
216 }
217 thisLevelAtoms = nextLevelAtoms;
218 }
219
220 // Add atoms with original index stored as property
221 Map<IAtom, IAtom> originalToReduced = new HashMap<>(); // original atom -> reduced atom
222 for (IAtom originalAtom : atomsToKeep)
223 {
224 IAtom reducedAtom = originalAtom.getBuilder().newInstance(
225 IAtom.class, originalAtom);
226 reduced.addAtom(reducedAtom);
227 originalToReduced.put(originalAtom, reducedAtom);
228
229 // Store the original atom index as a property
230 int originalIndex = originalIAC.indexOf(originalAtom);
231 reducedAtom.setProperty("DENOPTIM_ORIGINAL_ATOM_INDEX", originalIndex);
232
233 // Copy all other properties
234 for (Object key : originalAtom.getProperties().keySet())
235 {
236 if (!key.equals("DENOPTIM_ORIGINAL_ATOM_INDEX"))
237 {
238 reducedAtom.setProperty(key, originalAtom.getProperty(key));
239 }
240 }
241 }
242
243 // Add bonds between kept atoms
244 for (IBond bond : originalIAC.bonds())
245 {
246 IAtom atom1 = bond.getAtom(0);
247 IAtom atom2 = bond.getAtom(1);
248
249 if (atomsToKeep.contains(atom1) && atomsToKeep.contains(atom2))
250 {
251 IBond newBond = bond.getBuilder().newInstance(IBond.class,
252 originalToReduced.get(atom1), originalToReduced.get(atom2), bond.getOrder());
253 reduced.addBond(newBond);
254 }
255 }
256
257 return reduced;
258 }
259
260//-----------------------------------------------------------------------------
261}
General set of constants used in DENOPTIM.
static final String ATMPROPAPS
String tag of Atom property used to store attachment points.
static final String ATMPROPVERTEXID
String tag of Atom property used to store the unique ID of the Vertex corresponding to the molecular ...
static final String DUMMYATMSYMBOL
Symbol of dummy atom.
TopoTemplateProducer(IAtomContainer originalIAC)
Constructor.
IAtomContainer originalIAC
The original IAtomContainer to produce a topology-critical template for.
boolean produceHDepleted
Flag recording whether we could only produce a H-depleted template.
Set< IAtom > topoCriticalAtoms
topology-critical atoms
IAtomContainer getTemplateWithBufferShell(int bufferShellSize)
Produced a new IAtomContainer containing all the atoms needed to define the topology of the original ...
Utilities for molecule conversion.
static String getSymbolOrLabel(IAtom atm)
Gets either the elemental symbol (for standard atoms) of the label (for pseudo-atoms).
static List< IAtom > findShortestPath(IAtomContainer mol, IAtom start, IAtom end, Map< IAtom, Long > atomToVertexId)
Finds the shortest path between two atoms in a molecule using BFS.
static boolean isElement(IAtom atom)
Check element symbol corresponds to real element of Periodic Table.