$darkmode
DENOPTIM
FormulaUtils.java
Go to the documentation of this file.
1package denoptim.utils;
2
3import java.util.ArrayList;
4import java.util.Arrays;
5import java.util.HashMap;
6import java.util.HashSet;
7import java.util.List;
8import java.util.Map;
9import java.util.Set;
10import java.util.logging.Level;
11import java.util.logging.Logger;
12
13import org.openscience.cdk.Isotope;
14import org.openscience.cdk.formula.MolecularFormula;
15import org.openscience.cdk.interfaces.IAtom;
16import org.openscience.cdk.interfaces.IAtomContainer;
17import org.openscience.cdk.interfaces.IIsotope;
18import org.openscience.cdk.tools.manipulator.MolecularFormulaManipulator;
19
20import denoptim.exception.DENOPTIMException;
21import denoptim.io.DenoptimIO;
22
28public class FormulaUtils
29{
30
31//------------------------------------------------------------------------------
32
38 public static Map<String,Double> parseFormula(String formula)
39 {
40 Map<String,Double> elementsMap = new HashMap<String,Double>();
41 String[] words = formula.split("\\s+");
42 for (int i=0; i<words.length; i++)
43 {
44 String elSymbol = "";
45 Double elCount = 0.0;
46 int characterIdx = 0;
47 while (characterIdx<words[i].length()
48 && Character.isLetter(words[i].charAt(characterIdx)))
49 {
50 characterIdx++;
51 }
52 elSymbol = words[i].substring(0,characterIdx);
53 if (characterIdx<words[i].length())
54 {
55 elCount = Double.parseDouble(words[i].substring(characterIdx));
56 } else {
57 elCount = 1.0;
58 }
59 elementsMap.put(elSymbol, elCount);
60 }
61 return elementsMap;
62 }
63
64//------------------------------------------------------------------------------
65
81 public static Map<String, ArrayList<Double>> parseCSDFormula(String formula)
83 {
84 //Parameter: maximum number of variable molecules
85 int limSF = 6;
86
87 //Element counts over the list of molecules in the formula
88 ArrayList<Map<String,Integer>> allAtmCounts =
89 new ArrayList<Map<String,Integer>>();
90
91 String[] mols = formula.split(",");
92
93 //Deal with stoichiometric factors
94 List<Double> stocFact = new ArrayList<Double>();
95 List<Boolean> tuneStFact = new ArrayList<Boolean>();
96 List<Boolean> tuneStFactToInt = new ArrayList<Boolean>();
97 for (int i=0; i < mols.length; i++)
98 {
99 stocFact.add(i,1.0);
100 tuneStFact.add(i,false);
101 tuneStFactToInt.add(i,false);
102 String locForm = mols[i].trim();
103 boolean found = false;
104 //Check for stoichiometric factor
105 if (locForm.contains("("))
106 {
107 //There is a stoichiometric factor
108 if (locForm.lastIndexOf(")") == (locForm.length()-1))
109 {
110 // the Stoc. Fact. is at the beginning => nothing at the end!
111 String[] spt = locForm.split("[()]");
112 if (spt[0].contains("n"))
113 {
114 //Tunable Stoic. Fact.
115 if (spt[0].lastIndexOf("n") == 0)
116 {
117 //n-case 0: __n(formula)__
118 tuneStFact.set(i,true);
119 locForm = spt[1];
120 found = true;
121 } else if (spt[0].lastIndexOf("n") ==
122 (spt[0].length()-1))
123 {
124 //n-case 1: __NUMn(formula)__
125 stocFact.set(i,Double.parseDouble(
126 spt[0].substring(0,spt[0].length() -1)));
127 tuneStFact.set(i,true);
128 locForm = spt[1];
129 found = true;
130 }
131 } else if (spt[0].contains("x"))
132 {
133 //Tunable Stoic. Fact. using x as variable
134 if (spt[0].lastIndexOf("x") == 0)
135 {
136 //n-case 0: __x(formula)__
137 tuneStFact.set(i,true);
138 locForm = spt[1];
139 found = true;
140 } else if (spt[0].lastIndexOf("x")
141 == (spt[0].length()-1))
142 {
143 //n-case 1: __NUMx(formula)__
144 stocFact.set(i,Double.parseDouble(
145 spt[0].substring(0,spt[0].length() -1)));
146 tuneStFact.set(i,true);
147 locForm = spt[1];
148 found = true;
149 }
150 } else {
151 stocFact.set(i,Double.parseDouble(spt[0]));
152 locForm = spt[1];
153 found = true;
154 }
155 } else if (locForm.lastIndexOf("(") == 0)
156 {
157 //Stoic. Fact. at the end
158 String[] sptR = locForm.split("[()]");
159 locForm = sptR[1];
160
161 //n-case 2: __(formula)n__
162 if (sptR[2].length() == 1)
163 {
164 tuneStFact.set(i,true);
165 found = true;
166 } else if (sptR[2].lastIndexOf("x")
167 == (sptR[0].length()-1)) {
168 //n-case 1: __(formula)NUMx__
169 stocFact.set(i,Double.parseDouble(
170 sptR[2].substring(0,sptR[2].length() -1)));
171 tuneStFact.set(i,true);
172 locForm = sptR[1];
173 found = true;
174 }
175
176 }
177
178 if (!found)
179 {
180 throw new DENOPTIMException("Moleculer formula '" +
181 formula + "' has a syntax that is not "
182 + "recognized.");
183 }
184
185 //Deal with fractionary Stoich. Fact.
186 double mod = stocFact.get(i) % 1.0;
187 if (mod != 0.0)
188 tuneStFactToInt.set(i,true);
189
190 } //end of stochiometric factor analysis
191
192 //Identify elements to be counted
193 Map<String,Integer> locCount = new HashMap<String,Integer>();
194 String [] lmnts = locForm.split("\\s+");
195 for (int l = 0; l < lmnts.length; l++)
196 {
197 //Get rid of the charge, if any
198 if (lmnts[l].endsWith("+") || lmnts[l].endsWith("-"))
199 continue;
200
201 //Count of a single element
202 String elSymbol = "";
203 int elCount = 0;
204 int fd = 0;
205 while (fd<lmnts[l].length()
206 && Character.isLetter(lmnts[l].charAt(fd)))
207 {
208 fd++;
209 }
210 elSymbol = lmnts[l].substring(0,fd);
211 if (fd<lmnts[l].length())
212 {
213 elCount = Integer.parseInt(lmnts[l].substring(fd));
214 } else {
215 elCount = 1;
216 }
217
218 //add this element and its count to the map
219 locCount.put(elSymbol,elCount);
220 }
221 //Move the atoms count to the general storage
222 allAtmCounts.add(i,locCount);
223
224 } //end of loop over mols
225
226 //Find largest molecule (from formula weight)
227 double largestMass = 0.0;
228 for (int i = 0; i < mols.length; i++)
229 {
230 MolecularFormula molForm = new MolecularFormula();
231 for (String el : allAtmCounts.get(i).keySet())
232 {
233 IIsotope is = new Isotope(el);
234 molForm.addIsotope(is,allAtmCounts.get(i).get(el));
235 }
236 double mass = MolecularFormulaManipulator.getMass(molForm);
237 if (mass > largestMass)
238 {
239 largestMass = mass;
240 }
241 }
242
243 //Get all the elements
244 Set<String> allEl = new HashSet<String>();
245 for (int i = 0; i < mols.length; i++)
246 for (String el : allAtmCounts.get(i).keySet())
247 allEl.add(el);
248
249 //report counting for all SINGLE molecules
250 Map<String,ArrayList<Double>> elemAnalFormula =
251 new HashMap<String,ArrayList<Double>>();
252 for (int i = 0; i < mols.length; i++)
253 {
254 for (String el : allEl)
255 {
256 double num = 0.0;
257 if (allAtmCounts.get(i).containsKey(el))
258 {
259 num = allAtmCounts.get(i).get(el);
260 }
261 if (!elemAnalFormula.containsKey(el))
262 elemAnalFormula.put(el,new ArrayList<Double>(Arrays.asList(num)));
263 else
264 elemAnalFormula.get(el).add(num);
265 }
266 }
267
268 //report counting for all SINGLE unit [NUM(molecule)]
269 for (int i = 0; i < mols.length; i++)
270 {
271 for (String el : allEl)
272 {
273 double num = 0.0;
274 if (allAtmCounts.get(i).containsKey(el))
275 {
276 double pf = 1.0;
277 if (tuneStFactToInt.get(i))
278 pf = pf * (1.0 / stocFact.get(i));
279 num = allAtmCounts.get(i).get(el) * pf * stocFact.get(i);
280 }
281 if (!elemAnalFormula.containsKey(el))
282 elemAnalFormula.put(el,new ArrayList<Double>(
283 Arrays.asList(num)));
284 else
285 elemAnalFormula.get(el).add(num);
286 }
287 }
288
289 //report counting for sums of the first n molecules up to all molecules
290 if (mols.length > 1)
291 {
292 for (int n = 2; n <= mols.length; n++)
293 {
294 //check for tunable stoich. factors in all molecules
295 //(assuming ONLY ONE factor has to be tuned)
296 boolean doTuning = true;
297 for (int i = 0; i < mols.length; i++)
298 {
299 if (!tuneStFact.get(i))
300 {
301 doTuning = false;
302 }
303 }
304 //Calculate number of atoms of per each element
305 for (String el : allEl)
306 {
307 double thisElCount = 0.0;
308 //sums of the first n molecules Ignoring stoichiometric factors
309 for (int i = 0; i < n; i++)
310 {
311 if (allAtmCounts.get(i).containsKey(el))
312 {
313 double pf = 1.0;
314 if (tuneStFactToInt.get(i))
315 pf = pf * (1.0 / stocFact.get(i));
316 thisElCount = thisElCount + allAtmCounts.get(i).get(el) * pf * stocFact.get(i);
317 }
318 }
319 elemAnalFormula.get(el).add(thisElCount);
320
321 //tune stoichiometric factors if required
322 if (doTuning)
323 {
324 //per each prefactor within the limits of the tuning procedure
325 for (int pf = 2; pf < limSF ; pf ++)
326 {
327 double pfd = pf;
328 double thisElCountTune = 0.0;
329 //sums of the first n molecules with stoichiometric factors
330 for (int i = 0; i < n; i++)
331 {
332 if (allAtmCounts.get(i).containsKey(el))
333 {
334 thisElCountTune = thisElCountTune + allAtmCounts.get(i).get(el) * pfd * stocFact.get(i);
335 }
336 }
337 elemAnalFormula.get(el).add(thisElCountTune);
338 }
339 }
340 }
341 }
342 }
343
344 return elemAnalFormula;
345 }
346
347//------------------------------------------------------------------------------
348
363 public static boolean compareFormulaAndElementalAnalysis(String formula,
364 IAtomContainer mol) throws DENOPTIMException
365 {
366 return compareFormulaAndElementalAnalysis(formula, mol, null);
367 }
368//------------------------------------------------------------------------------
369
385 public static boolean compareFormulaAndElementalAnalysis(String formula,
386 IAtomContainer mol, Logger logger) throws DENOPTIMException
387 {
388 // Elemental analysis of molecular formula (with possible variations)
389 Map<String,ArrayList<Double>> elemAnalFormula =
391 if (logger!=null && logger.getLevel() == Level.FINEST)
392 {
393 StringBuilder sb = new StringBuilder();
394 for (String el : elemAnalFormula.keySet())
395 sb.append(DenoptimIO.NL).append(el+" "+elemAnalFormula.get(el));
396 logger.log(Level.FINEST,"Elemental analysis from formula: "
397 + sb.toString());
398 }
399
400 // Elemental analysis on structure
401 Map<String,Double> elemAnalMolInfo = getElementalanalysis(mol);
402 if (logger!=null && logger.getLevel() == Level.FINEST)
403 {
404 StringBuilder sb = new StringBuilder();
405 for (String el : elemAnalMolInfo.keySet())
406 sb.append(DenoptimIO.NL).append(el+" "+elemAnalMolInfo.get(el));
407 logger.log(Level.FINEST,"Elemental analysis from atom structure: "
408 + sb.toString());
409 }
410
411 // Compare the two elemental analysis.
412
413 // First, get the number of candidate guesses from the molecule.
414 // Each guess is the
415 // result of including/excluding one/more isolated molecules
416 // or using a different tunable stochiometric factor.
417 int numCandidates = 0;
418 for (String el : elemAnalFormula.keySet())
419 {
420 numCandidates = elemAnalFormula.get(el).size();
421 break; // the size is equal for every value.
422 }
423
424 boolean foundMatch = false;
425 loopOverCandidate:
426 for (int i=0; i<numCandidates; i++)
427 {
428 for (String el : elemAnalFormula.keySet())
429 {
430 if (elemAnalFormula.get(el).get(i)>0.01
431 && !elemAnalMolInfo.containsKey(el))
432 {
433 continue loopOverCandidate;
434 }
435 if (elemAnalMolInfo.containsKey(el))
436 {
437 if (Math.abs(
438 elemAnalFormula.get(el).get(i)
439 - elemAnalMolInfo.get(el))
440 > 0.1)
441 {
442 continue loopOverCandidate;
443 }
444 } else {
445 if (Math.abs(elemAnalFormula.get(el).get(i) - 0.0)
446 > 0.1)
447 {
448 continue loopOverCandidate;
449 }
450 }
451 }
452 foundMatch = true;
453 break;
454 }
455 return foundMatch;
456 }
457
458//------------------------------------------------------------------------------
459
465 public static Map<String,Double> getElementalanalysis(IAtomContainer mol)
466 {
467 Map<String,Double> elemAnalMolInfo = new HashMap<String,Double>();
468 for (IAtom atm : mol.atoms())
469 {
470 String elSymbol = atm.getSymbol();
471 //Deal with deuterium symbol
472 if (atm.getMassNumber()!=null && atm.getMassNumber() == 2)
473 elSymbol = "D";
474 if (elemAnalMolInfo.keySet().contains(elSymbol))
475 {
476 double num = elemAnalMolInfo.get(elSymbol) + 1.0;
477 elemAnalMolInfo.put(elSymbol,num);
478 } else {
479 elemAnalMolInfo.put(elSymbol,1.0);
480 }
481 }
482 return elemAnalMolInfo;
483 }
484
485//------------------------------------------------------------------------------
486
487}
Utility methods for input/output.
static final String NL
Newline character from system.
Utilities for manipulating molecular formulas.
static boolean compareFormulaAndElementalAnalysis(String formula, IAtomContainer mol)
Compares the molecular formula formatted as from the Cambridge Structural Database (CSD) against the ...
static Map< String, ArrayList< Double > > parseCSDFormula(String formula)
Reads a molecular formula formatted as from the Cambridge Structural Database and produces a profile ...
static Map< String, Double > parseFormula(String formula)
Reads a molecular formula written as "H2 O" or "C6 H12 O6".
static Map< String, Double > getElementalanalysis(IAtomContainer mol)
Threads Deuterium as a different element than Hydrogen.
static boolean compareFormulaAndElementalAnalysis(String formula, IAtomContainer mol, Logger logger)
Compares the molecular formula formatted as from the Cambridge Structural Database (CSD) against the ...