diff -Nru opsin-1.5.0/debian/changelog opsin-2.3.1/debian/changelog --- opsin-1.5.0/debian/changelog 2013-07-28 21:17:58.000000000 +0000 +++ opsin-2.3.1/debian/changelog 2017-12-20 09:35:56.000000000 +0000 @@ -1,6 +1,25 @@ -opsin (1.5.0-1) unstable; urgency=low +opsin (2.3.1-1) unstable; urgency=low + + * New upstream release. [ Daniel Leidert ] + * debian/watch: Fixed. + + [ Michael Banck ] + * debian/control (Vcs-Browser, Vcs-Svn): Updated. + * debian/control (Homepage): Updated. + * debian/upstream/metadata: New file. + * debian/control (Build-Depends): Added libwoodstox-java and + libstax2-api-java. + * debian/rules (CLASSPATH): Updated for libwoodstox-java and + libstax2-api-java. + * debian/javabuild, debian/libopsin-java.jlibs: Updated to new upstream + release. + + -- Michael Banck Wed, 20 Dec 2017 10:35:56 +0100 + +opsin (1.5.0-1) unstable; urgency=low + * New upstream release. -- Daniel Leidert Sun, 28 Jul 2013 23:17:55 +0200 diff -Nru opsin-1.5.0/debian/compat opsin-2.3.1/debian/compat --- opsin-1.5.0/debian/compat 2012-03-13 23:56:42.000000000 +0000 +++ opsin-2.3.1/debian/compat 2017-12-10 18:21:11.000000000 +0000 @@ -1 +1 @@ -7 +9 diff -Nru opsin-1.5.0/debian/control opsin-2.3.1/debian/control --- opsin-1.5.0/debian/control 2013-05-20 20:41:05.000000000 +0000 +++ opsin-2.3.1/debian/control 2017-12-19 11:00:39.000000000 +0000 @@ -4,7 +4,7 @@ Maintainer: Debichem Team Uploaders: Michael Banck Build-Depends: ant, - debhelper (>= 7.0.50~), + debhelper (>= 9), default-jdk, javahelper, junit4, @@ -13,11 +13,13 @@ libcommons-io-java, liblog4j1.2-java, libmockito-java, + libstax2-api-java, + libwoodstox-java, libxom-java -Standards-Version: 3.9.4 -Homepage: http://www-ucc.ch.cam.ac.uk/products/software/opsin -Vcs-Browser: http://svn.debian.org/wsvn/debichem/unstable/opsin/ -Vcs-Svn: svn://svn.debian.org/svn/debichem/unstable/opsin/ +Standards-Version: 4.1.0 +Homepage: http://opsin.ch.cam.ac.uk/ +Vcs-Browser: http://anonscm.debian.org/viewvc/debichem/unstable/opsin/ +Vcs-Svn: svn://anonscm.debian.org/debichem/unstable/opsin/ Package: libopsin-java Architecture: all diff -Nru opsin-1.5.0/debian/javabuild opsin-2.3.1/debian/javabuild --- opsin-1.5.0/debian/javabuild 2013-07-28 21:16:55.000000000 +0000 +++ opsin-2.3.1/debian/javabuild 2017-12-19 12:55:49.000000000 +0000 @@ -1 +1 @@ -opsin-1.5.0.jar opsin-core +opsin-2.3.1.jar opsin-core diff -Nru opsin-1.5.0/debian/libopsin-java.jlibs opsin-2.3.1/debian/libopsin-java.jlibs --- opsin-1.5.0/debian/libopsin-java.jlibs 2013-07-28 21:16:55.000000000 +0000 +++ opsin-2.3.1/debian/libopsin-java.jlibs 2017-12-19 11:04:03.000000000 +0000 @@ -1 +1 @@ -opsin-1.5.0.jar +opsin-2.3.1.jar diff -Nru opsin-1.5.0/debian/rules opsin-2.3.1/debian/rules --- opsin-1.5.0/debian/rules 2013-05-20 20:41:05.000000000 +0000 +++ opsin-2.3.1/debian/rules 2017-12-19 11:00:28.000000000 +0000 @@ -10,7 +10,7 @@ #export DH_VERBOSE=1 export JAVA_HOME := /usr/lib/jvm/default-java -export CLASSPATH := /usr/share/java/automaton.jar:/usr/share/java/xom.jar:/usr/share/java/commons-io.jar:/usr/share/java/commons-cli.jar:/usr/share/java/junit4.jar:/usr/share/java/log4j-1.2.jar:/usr/share/java/mockito-core.jar:/usr/share/java/jaxen.jar +export CLASSPATH := /usr/share/java/automaton.jar:/usr/share/java/xom.jar:/usr/share/java/commons-io.jar:/usr/share/java/commons-cli.jar:/usr/share/java/junit4.jar:/usr/share/java/log4j-1.2.jar:/usr/share/java/mockito-core.jar:/usr/share/java/jaxen.jar:/usr/share/java/woodstox-core-lgpl.jar:/usr/share/java/stax2-api.jar %: dh $@ --with javahelper --parallel diff -Nru opsin-1.5.0/debian/upstream/metadata opsin-2.3.1/debian/upstream/metadata --- opsin-1.5.0/debian/upstream/metadata 1970-01-01 00:00:00.000000000 +0000 +++ opsin-2.3.1/debian/upstream/metadata 2017-12-10 18:31:04.000000000 +0000 @@ -0,0 +1,10 @@ +Name: OPSIN +Homepage: http://opsin.ch.cam.ac.uk/ +Reference: + - Title: "Chemical Name to Structure: OPSIN, an Open Source Solution" + Author: D. M. Lowe and P. T. Corbett and P. Murray-Rust and R. C. Glen + Journal: J. Chem. Inf. Model. + Year: 2011 + Volume: 51 + Pages: 739-753 + DOI: 10.1021/ci100384d diff -Nru opsin-1.5.0/debian/watch opsin-2.3.1/debian/watch --- opsin-1.5.0/debian/watch 2013-05-20 20:46:17.000000000 +0000 +++ opsin-2.3.1/debian/watch 2017-12-10 18:21:11.000000000 +0000 @@ -1,2 +1,2 @@ version=3 -https://bitbucket.org/dan2097/opsin/downloads ^.*/get/([\d.]+)\.tar\.(?:bz2|gz)$ +https://bitbucket.org/dan2097/opsin/downloads?tab=tags ^.*/get/([\d.]+)\.tar\.(?:bz2|gz)$ diff -Nru opsin-1.5.0/.hg_archival.txt opsin-2.3.1/.hg_archival.txt --- opsin-1.5.0/.hg_archival.txt 2013-07-21 14:02:29.000000000 +0000 +++ opsin-2.3.1/.hg_archival.txt 2017-07-23 20:55:18.000000000 +0000 @@ -1,4 +1,4 @@ repo: e6e7d661ac0d4a52f1782ecd91caf02f437479f3 -node: 12cfaef8a36f1a8f82dd6eaef3af6ad8a44dc47c +node: 544558db8289554e6bd208e6b755964f3fad5ea5 branch: default -tag: 1.5.0 +tag: 2.3.1 diff -Nru opsin-1.5.0/.hgignore opsin-2.3.1/.hgignore --- opsin-1.5.0/.hgignore 2013-07-21 14:02:29.000000000 +0000 +++ opsin-2.3.1/.hgignore 2017-07-23 20:55:18.000000000 +0000 @@ -1,5 +1,5 @@ -relre:opsin-inchi/src/main/java/dl/* -relre:target/* -.classpath -.project -.settings +(^|/)target/ +opsin-inchi/src/main/java/dl/ +\.classpath +\.project +\.settings diff -Nru opsin-1.5.0/.hgtags opsin-2.3.1/.hgtags --- opsin-1.5.0/.hgtags 2013-07-21 14:02:29.000000000 +0000 +++ opsin-2.3.1/.hgtags 2017-07-23 20:55:18.000000000 +0000 @@ -8,3 +8,9 @@ bb5741be6c9707e89bcbecd4a50551ffd674a2e9 1.2.0 354121fd59845d60046430dd03e7a4c64e2bb94c 1.3.0 76ad2554bb162a428c0ce7d1edf9e25ad7b6664d 1.4.0 +12cfaef8a36f1a8f82dd6eaef3af6ad8a44dc47c 1.5.0 +56f2435d4f0a90ba916200ee3bc18d5534c2e850 1.6.0 +a9d50483841a75b05f4c47b47f61b593e443195f 2.0.0 +fc406d1e2fa8ff5625d0ca1f67f5066c2b2316f0 2.1.0 +eedc8ce4be56dc084d959f0a963e5de97f8a8af5 2.2.0 +32dc04cc34e44d0e8dac625aeb2f224d52e57c47 2.3.0 diff -Nru opsin-1.5.0/LICENSE-External.txt opsin-2.3.1/LICENSE-External.txt --- opsin-1.5.0/LICENSE-External.txt 2013-07-21 14:02:29.000000000 +0000 +++ opsin-2.3.1/LICENSE-External.txt 2017-07-23 20:55:18.000000000 +0000 @@ -3,10 +3,10 @@ This distribution contains some content that is copyrighted by its respective owners and distributed under the indicated licenses. -Automaton (http://www.brics.dk/~amoeller/automaton) - content licensed under the BSD -License (see below) +Automaton (http://www.brics.dk/~amoeller/automaton) - content licensed under the BSD License +(see below) -XOM (http://www.xom.nu) - content licensed under the GNU Lesser General Public License (LGPL) +Woodstox (https://github.com/FasterXML/woodstox) - content licenced under the The Apache Software License, Version 2.0 (see below) log4j (http://logging.apache.org/log4j/) - content licenced under the The Apache Software License, Version 2.0 diff -Nru opsin-1.5.0/opsin-core/pom.xml opsin-2.3.1/opsin-core/pom.xml --- opsin-1.5.0/opsin-core/pom.xml 2013-07-21 14:02:29.000000000 +0000 +++ opsin-2.3.1/opsin-core/pom.xml 2017-07-23 20:55:18.000000000 +0000 @@ -3,7 +3,7 @@ opsin uk.ac.cam.ch.opsin - 1.5.0 + 2.3.1 opsin-core OPSIN Core @@ -24,6 +24,22 @@ + + + src/main/resources + true + + **/*.props + + + + src/main/resources + false + + **/*.props + + + @@ -31,12 +47,12 @@ automaton - xom - xom + org.codehaus.woodstox + woodstox-core-asl - commons-io - commons-io + commons-io + commons-io commons-cli diff -Nru opsin-1.5.0/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/AmbiguityChecker.java opsin-2.3.1/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/AmbiguityChecker.java --- opsin-1.5.0/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/AmbiguityChecker.java 1970-01-01 00:00:00.000000000 +0000 +++ opsin-2.3.1/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/AmbiguityChecker.java 2017-07-23 20:55:18.000000000 +0000 @@ -0,0 +1,219 @@ +package uk.ac.cam.ch.wwmm.opsin; + +import static uk.ac.cam.ch.wwmm.opsin.XmlDeclarations.ELEMENTARYATOM_SUBTYPE_VAL; + +import java.util.ArrayDeque; +import java.util.ArrayList; +import java.util.Collection; +import java.util.Deque; +import java.util.HashMap; +import java.util.HashSet; +import java.util.LinkedHashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; + +class AmbiguityChecker { + + static boolean isSubstitutionAmbiguous(List substitutableAtoms, int numberToBeSubstituted) { + if (substitutableAtoms.size() == 0) { + throw new IllegalArgumentException("OPSIN Bug: Must provide at least one substituable atom"); + } + if (substitutableAtoms.size() < numberToBeSubstituted) { + throw new IllegalArgumentException("OPSIN Bug: substitutableAtoms must be >= numberToBeSubstituted"); + } + if (substitutableAtoms.size() == numberToBeSubstituted){ + return false; + } + if (allAtomsConnectToDefaultInAtom(substitutableAtoms, numberToBeSubstituted)) { + return false; + } + Set uniqueAtoms = new HashSet(substitutableAtoms); + if (uniqueAtoms.size() == 1) { + return false; + } + if (allAtomsEquivalent(uniqueAtoms) && (numberToBeSubstituted == 1 || numberToBeSubstituted == substitutableAtoms.size() - 1)){ + return false; + } + return true; + } + + static boolean allAtomsEquivalent(Collection atoms) { + StereoAnalyser analyser = analyseRelevantAtomsAndBonds(atoms); + Set uniqueEnvironments = new HashSet(); + for (Atom a : atoms) { + uniqueEnvironments.add(getAtomEnviron(analyser, a)); + } + return uniqueEnvironments.size() == 1; + } + + static boolean allBondsEquivalent(Collection bonds) { + Set relevantAtoms = new HashSet(); + for (Bond b : bonds) { + relevantAtoms.add(b.getFromAtom()); + relevantAtoms.add(b.getToAtom()); + } + StereoAnalyser analyser = analyseRelevantAtomsAndBonds(relevantAtoms); + Set uniqueBonds = new HashSet(); + for (Bond b : bonds) { + uniqueBonds.add(bondToCanonicalEnvironString(analyser, b)); + } + return uniqueBonds.size() == 1; + } + + private static String bondToCanonicalEnvironString(StereoAnalyser analyser, Bond b) { + String s1 = getAtomEnviron(analyser, b.getFromAtom()); + String s2 = getAtomEnviron(analyser, b.getToAtom()); + if (s1.compareTo(s2) > 0){ + return s1 + s2; + } + else { + return s2 + s1; + } + } + + static String getAtomEnviron(StereoAnalyser analyser, Atom a) { + Integer env = analyser.getAtomEnvironmentNumber(a); + if (env == null) { + throw new RuntimeException("OPSIN Bug: Atom was not part of ambiguity analysis"); + } + //"identical" atoms may be distinguished by bonds yet to be formed, hence split by outvalency + // e.g. [PH3] vs [PH3]= + return env + "\t" + a.getOutValency(); + } + + private static boolean allAtomsConnectToDefaultInAtom(List substitutableAtoms, int numberToBeSubstituted) { + Atom defaultInAtom = substitutableAtoms.get(0).getFrag().getDefaultInAtom(); + if (defaultInAtom != null) { + for (int i = 0; i < numberToBeSubstituted; i++) { + if (!substitutableAtoms.get(i).equals(defaultInAtom)) { + return false; + } + } + return true; + } + return false; + } + + static StereoAnalyser analyseRelevantAtomsAndBonds(Collection startingAtoms) { + Set atoms = new HashSet(); + Set bonds = new HashSet(); + Deque stack = new ArrayDeque(startingAtoms); + while (!stack.isEmpty()) { + Atom a = stack.removeLast(); + if (!atoms.contains(a)) { + atoms.add(a); + for (Bond b : a.getBonds()) { + bonds.add(b); + stack.add(b.getOtherAtom(a)); + } + } + } + + List ghostHydrogens = new ArrayList(); + for (Atom atom : atoms) { + if (atom.getFrag().getSubType().equals(ELEMENTARYATOM_SUBTYPE_VAL)){//these do not have implicit hydrogen e.g. phosphorus is literally just a phosphorus atom + continue; + } + int explicitHydrogensToAdd = StructureBuildingMethods.calculateSubstitutableHydrogenAtoms(atom); + for (int i = 0; i < explicitHydrogensToAdd; i++) { + Atom ghostHydrogen = new Atom(ChemEl.H); + Bond b = new Bond(ghostHydrogen, atom, 1); + atom.addBond(b); + ghostHydrogen.addBond(b); + ghostHydrogens.add(ghostHydrogen); + } + } + atoms.addAll(ghostHydrogens); + StereoAnalyser analyzer = new StereoAnalyser(atoms, bonds); + for (Atom ghostHydrogen : ghostHydrogens) { + Bond b = ghostHydrogen.getFirstBond(); + b.getOtherAtom(ghostHydrogen).removeBond(b); + } + return analyzer; + } + + static List useAtomEnvironmentsToGivePlausibleSubstitution(List substitutableAtoms, int numberToBeSubstituted) { + if (substitutableAtoms.size() == 0) { + throw new IllegalArgumentException("OPSIN Bug: Must provide at least one substituable atom"); + } + if (substitutableAtoms.size() < numberToBeSubstituted) { + throw new IllegalArgumentException("OPSIN Bug: substitutableAtoms must be >= numberToBeSubstituted"); + } + if (substitutableAtoms.size() == numberToBeSubstituted){ + return substitutableAtoms; + } + + List preferredAtoms = findPlausibleSubstitutionPatternUsingSymmmetry(substitutableAtoms, numberToBeSubstituted); + if (preferredAtoms != null){ + return preferredAtoms; + } + return findPlausibleSubstitutionPatternUsingLocalEnvironment(substitutableAtoms, numberToBeSubstituted); + } + + private static List findPlausibleSubstitutionPatternUsingSymmmetry(List substitutableAtoms, int numberToBeSubstituted) { + //cf. octaethylporphyrin (8 identical atoms capable of substitution) + StereoAnalyser analyser = analyseRelevantAtomsAndBonds(new HashSet(substitutableAtoms)); + Map> atomsInEachEnvironment = new HashMap>(); + for (Atom a : substitutableAtoms) { + String env = getAtomEnviron(analyser, a); + List atomsInEnvironment = atomsInEachEnvironment.get(env); + if (atomsInEnvironment == null) { + atomsInEnvironment = new ArrayList(); + atomsInEachEnvironment.put(env, atomsInEnvironment); + } + atomsInEnvironment.add(a); + } + List preferredAtoms = null; + for (List atoms : atomsInEachEnvironment.values()) { + if (atoms.size() == numberToBeSubstituted){ + if (preferredAtoms != null){ + return null; + } + preferredAtoms = atoms; + } + } + if (preferredAtoms == null) { + //check for environments with double the required atoms where this means each atom can support two substitutions c.f. cyclohexane + for (List atoms : atomsInEachEnvironment.values()) { + if (atoms.size() == (numberToBeSubstituted * 2)){ + Set uniquified = new LinkedHashSet(atoms);//retain deterministic atom ordering + if (uniquified.size() == numberToBeSubstituted) { + if (preferredAtoms != null){ + return null; + } + preferredAtoms = new ArrayList(uniquified); + } + } + } + } + return preferredAtoms; + } + + private static List findPlausibleSubstitutionPatternUsingLocalEnvironment(List substitutableAtoms, int numberToBeSubstituted) { + //cf. pentachlorotoluene (5 sp2 carbons vs sp3 methyl) + Map> atomsInEachLocalEnvironment = new HashMap>(); + for (Atom a : substitutableAtoms) { + int valency = a.determineValency(true); + int currentValency = a.getIncomingValency() + a.getOutValency(); + int numOfBonds = (valency - currentValency) + a.getBondCount();//distinguish sp2 and sp3 atoms + String s = a.getElement().toString() +"\t" + valency + "\t" + numOfBonds + "\t" + a.hasSpareValency(); + List atomsInEnvironment = atomsInEachLocalEnvironment.get(s); + if (atomsInEnvironment == null) { + atomsInEnvironment = new ArrayList(); + atomsInEachLocalEnvironment.put(s, atomsInEnvironment); + } + atomsInEnvironment.add(a); + } + List preferredAtoms = null; + for (List atoms : atomsInEachLocalEnvironment.values()) { + if (atoms.size() == numberToBeSubstituted){ + if (preferredAtoms != null){ + return null; + } + preferredAtoms = atoms; + } + } + return preferredAtoms; + } +} diff -Nru opsin-1.5.0/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/AnnotatorState.java opsin-2.3.1/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/AnnotatorState.java --- opsin-1.5.0/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/AnnotatorState.java 1970-01-01 00:00:00.000000000 +0000 +++ opsin-2.3.1/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/AnnotatorState.java 2017-07-23 20:55:18.000000000 +0000 @@ -0,0 +1,72 @@ +package uk.ac.cam.ch.wwmm.opsin; + + +/** + * Contains the state needed during finite-state parsing + * From this the tokens string and their semantics can be generated + * @author Daniel + * + */ +class AnnotatorState { + + /** The current state of the DFA. */ + private final int state; + /** The annotation so far. */ + private final char annot; + + /** The index of the first char in the chemical name that has yet to be tokenised */ + private final int posInName; + + private final boolean isCaseSensitive; + + private final AnnotatorState previousAs; + + + AnnotatorState(int state, char annot, int posInName, boolean isCaseSensitive, AnnotatorState previousAs) { + this.state = state; + this.annot = annot; + this.posInName = posInName; + this.isCaseSensitive = isCaseSensitive; + this.previousAs = previousAs; + } + + /** + * The current state in the DFA + * @return + */ + int getState() { + return state; + } + + /** + * The annotation that was consumed to transition to this state + * @return + */ + char getAnnot() { + return annot; + } + + /** + * The index of the first char in the chemical name that has yet to be tokenised (at the point of creating this AnnotatorState) + * @return + */ + int getPosInName() { + return posInName; + } + + /** + * Where the corresponding token is case sensitive + * @return + */ + boolean isCaseSensitive() { + return isCaseSensitive; + } + + /** + * The last annotator state for the previous token (or null if this is the first) + * @return + */ + AnnotatorState getPreviousAs() { + return previousAs; + } +} diff -Nru opsin-1.5.0/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/Atom.java opsin-2.3.1/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/Atom.java --- opsin-1.5.0/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/Atom.java 2013-07-21 14:02:29.000000000 +0000 +++ opsin-2.3.1/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/Atom.java 2017-07-23 20:55:18.000000000 +0000 @@ -8,9 +8,6 @@ import java.util.Set; import java.util.regex.Matcher; -import nu.xom.Attribute; -import nu.xom.Element; - import static uk.ac.cam.ch.wwmm.opsin.OpsinTools.*; /** @@ -25,13 +22,13 @@ class Atom { /**The (unique over the molecule) ID of the atom.*/ - private int ID; + private final int id; - /**The atomic symbol of the atom. */ - private String element; + /**The chemical element of the atom. */ + private ChemEl chemEl; /**The locants that pertain to the atom.*/ - private final List locants = new ArrayList(); + private final List locants = new ArrayList(2); /**The formal charge on the atom.*/ private int charge = 0; @@ -46,23 +43,29 @@ private AtomParity atomParity = null; /**The bonds that involve the atom*/ - private final List bonds = new ArrayList(); + private final List bonds = new ArrayList(4); /**A map between PropertyKey s as declared here and useful atom properties, usually relating to some kind of special case. */ @SuppressWarnings("rawtypes") private final Map properties = new HashMap(); /** A set of atoms that were equally plausible to perform functional replacement on */ - static final PropertyKey> AMBIGUOUS_ELEMENT_ASSIGNMENT = new PropertyKey>("ambiguousElementAssignment"); + static final PropertyKey> AMBIGUOUS_ELEMENT_ASSIGNMENT = new PropertyKey>("ambiguousElementAssignment"); + /** The atom class which will be output when serialised to SMILES. Useful for distinguishing attachment points */ + static final PropertyKey ATOM_CLASS = new PropertyKey("atomClass"); + /** Used on wildcard atoms to indicate their meaning */ + static final PropertyKey HOMOLOGY_GROUP = new PropertyKey("homologyGroup"); + /** Used on wildcard atoms to indicate that they are a position variation bond */ + static final PropertyKey> POSITION_VARIATION_BOND = new PropertyKey>("positionVariationBond"); /** The hydrogen count as set in the SMILES*/ - static final PropertyKey SMILES_HYDROGEN_COUNT = new PropertyKey("smilesHydrogenCount"); + static final PropertyKey SMILES_HYDROGEN_COUNT = new PropertyKey("smilesHydrogenCount"); /** The oxidation number as specified by Roman numerals in the name*/ - static final PropertyKey OXIDATION_NUMBER = new PropertyKey("oxidationNumber"); + static final PropertyKey OXIDATION_NUMBER = new PropertyKey("oxidationNumber"); /** Is this atom the carbon of an aldehyde? (however NOT formaldehyde)*/ - static final PropertyKey ISALDEHYDE = new PropertyKey("isAldehyde"); + static final PropertyKey ISALDEHYDE = new PropertyKey("isAldehyde"); /** Indicates that this atom is an anomeric atom in a cyclised carbohydrate*/ - static final PropertyKey ISANOMERIC = new PropertyKey("isAnomeric"); + static final PropertyKey ISANOMERIC = new PropertyKey("isAnomeric"); /** Transient integer used to indicate traversal of fragments*/ - static final PropertyKey VISITED = new PropertyKey("visited"); + static final PropertyKey VISITED = new PropertyKey("visited"); /**The fragment to which the atom belongs.*/ private Fragment frag; @@ -96,77 +99,36 @@ * Double bonds are only converted to spareValency if atom is in a ring * Some suffixes have different meanings if an atom is part of a ring or not c.g. cyclohexanal vs ethanal */ - private boolean atomIsInACycle =false; + private boolean atomIsInACycle = false; /** * Builds an Atom from scratch. * GENERALLY EXCEPT FOR TESTING SHOULD NOT BE CALLED EXCEPT FROM THE FRAGMANAGER - * @param ID The ID number, unique to the atom in the molecule being built - * @param element The atomic symbol of the chemical element + * @param id The ID number, unique to the atom in the molecule being built + * @param chemlEl The chemical element * @param frag the Fragment to contain the Atom */ - Atom(int ID, String element, Fragment frag) { - if (frag==null){ + Atom(int id, ChemEl chemlEl, Fragment frag) { + if (frag == null){ throw new IllegalArgumentException("Atom is not in a fragment!"); } - if (element==null){ + if (chemlEl == null){ throw new IllegalArgumentException("Atom does not have an element!"); } this.frag = frag; - this.ID = ID; - this.element = element; + this.id = id; + this.chemEl = chemlEl; this.type =frag.getType(); } /** Used to build a DUMMY atom. * Does not have an id/frag/type as would be expected for a proper atom - * @param element An identifier for this atom + * @param chemlEl The chemical element */ - Atom(String element){ - this.element = element; - } - - /**Produces a nu.xom.Element for a CML Atom tag, containing - * attributes for id, elementType and (if appropriate) formalCharge, isotopeNumber. - * Where applicable child elements are created for atomParity and locant labels - * - * @return nu.xom.Element for a CML Atom tag - */ - Element toCMLAtom() { - Element elem = new Element("atom", XmlDeclarations.CML_NAMESPACE); - elem.addAttribute(new Attribute("id", "a" + Integer.toString(ID))); - elem.addAttribute(new Attribute("elementType", element)); - if(charge != 0){ - elem.addAttribute(new Attribute("formalCharge", Integer.toString(charge))); - } - if(isotope != null){ - elem.addAttribute(new Attribute("isotopeNumber", Integer.toString(isotope))); - } - if (!element.equals("H")){ - int hydrogenCount =0; - List neighbours = this.getAtomNeighbours(); - for (Atom neighbour : neighbours) { - if (neighbour.getElement().equals("H")){ - hydrogenCount++; - } - } - if (hydrogenCount==0){//prevent adding of implicit hydrogen - elem.addAttribute(new Attribute("hydrogenCount", "0")); - } - } - if(atomParity != null){ - elem.appendChild(atomParity.toCML()); - } - for(String l : locants) { - Element locant = new Element("label", XmlDeclarations.CML_NAMESPACE); - locant.addAttribute(new Attribute("value", l)); - locant.addAttribute(new Attribute("dictRef", "cmlDict:locant" )); - elem.appendChild(locant); - } - return elem; + Atom(ChemEl chemlEl){ + this.chemEl = chemlEl; + this.id = 0; } - - /** * Uses the lambdaConventionValency or if that is not available @@ -182,29 +144,29 @@ */ int determineValency(boolean considerOutValency) { if (lambdaConventionValency != null){ - return lambdaConventionValency +protonsExplicitlyAddedOrRemoved; + return lambdaConventionValency + protonsExplicitlyAddedOrRemoved; } - int currentValency =getIncomingValency(); + int currentValency = getIncomingValency(); if (considerOutValency){ - currentValency+=outValency; + currentValency += outValency; } Integer calculatedMinValency = minimumValency == null ? null : minimumValency + protonsExplicitlyAddedOrRemoved; - if (charge ==0 || protonsExplicitlyAddedOrRemoved !=0){ - Integer defaultValency =ValencyChecker.getDefaultValency(element); - if (defaultValency !=null){ + if (charge ==0 || protonsExplicitlyAddedOrRemoved != 0){ + Integer defaultValency = ValencyChecker.getDefaultValency(chemEl); + if (defaultValency != null){ defaultValency += protonsExplicitlyAddedOrRemoved; if (currentValency <= defaultValency && (calculatedMinValency == null || defaultValency >= calculatedMinValency)){ return defaultValency; } } } - Integer[] possibleValencies =ValencyChecker.getPossibleValencies(element, charge); - if (possibleValencies!=null) { - if (calculatedMinValency!=null && calculatedMinValency >= currentValency){ + Integer[] possibleValencies = ValencyChecker.getPossibleValencies(chemEl, charge); + if (possibleValencies != null) { + if (calculatedMinValency != null && calculatedMinValency >= currentValency){ return calculatedMinValency; } for (Integer possibleValency : possibleValencies) { - if (calculatedMinValency!=null && possibleValency < calculatedMinValency){ + if (calculatedMinValency != null && possibleValency < calculatedMinValency){ continue; } if (currentValency <= possibleValency){ @@ -212,7 +174,7 @@ } } } - if (calculatedMinValency!=null && calculatedMinValency>= currentValency){ + if (calculatedMinValency != null && calculatedMinValency >= currentValency){ return calculatedMinValency; } else{ @@ -240,7 +202,7 @@ } void removeLocant(String locantToRemove) { - int locantArraySize =locants.size(); + int locantArraySize = locants.size(); for (int i = locantArraySize -1; i >=0 ; i--) { if (locants.get(i).equals(locantToRemove)){ locants.remove(i); @@ -253,8 +215,8 @@ * */ void clearLocants() { - for (String l : locants) { - frag.removeMappingFromAtomLocantMap(l); + for (int i = 0, l = locants.size(); i < l; i++) { + frag.removeMappingFromAtomLocantMap(locants.get(i)); } locants.clear(); } @@ -263,10 +225,10 @@ * Removes only elementSymbolLocants: e.g. N, S', Se */ void removeElementSymbolLocants() { - for (int i = locants.size()-1; i >=0; i--) { - String l =locants.get(i); - if (MATCH_ELEMENT_SYMBOL_LOCANT.matcher(l).matches()){ - frag.removeMappingFromAtomLocantMap(l); + for (int i = locants.size() - 1; i >= 0; i--) { + String locant = locants.get(i); + if (MATCH_ELEMENT_SYMBOL_LOCANT.matcher(locant).matches()){ + frag.removeMappingFromAtomLocantMap(locant); locants.remove(i); } } @@ -277,10 +239,10 @@ * Hence removes numeric locants and greek locants */ void removeLocantsOtherThanElementSymbolLocants() { - for (int i = locants.size()-1; i >=0; i--) { - String l =locants.get(i); - if (!MATCH_ELEMENT_SYMBOL_LOCANT.matcher(l).matches()){ - frag.removeMappingFromAtomLocantMap(l); + for (int i = locants.size() - 1; i >= 0; i--) { + String locant = locants.get(i); + if (!MATCH_ELEMENT_SYMBOL_LOCANT.matcher(locant).matches()){ + frag.removeMappingFromAtomLocantMap(locant); locants.remove(i); } } @@ -292,17 +254,16 @@ * @return true if it has, false if not */ boolean hasLocant(String locant) { - for(String l : locants) { - if(l.equals(locant)) - return true; + if (locants.contains(locant)) { + return true; } Matcher m = MATCH_AMINOACID_STYLE_LOCANT.matcher(locant); if (m.matches()){//e.g. N'5 - if (element.equals(m.group(1))){//element symbol + if (chemEl.toString().equals(m.group(1))){//element symbol if (!m.group(2).equals("") && (!hasLocant(m.group(1) +m.group(2)))){//has primes return false;//must have exact locant e.g. N' } - if (OpsinTools.depthFirstSearchForNonSuffixAtomWithLocant(this, m.group(3))!=null){ + if (OpsinTools.depthFirstSearchForNonSuffixAtomWithLocant(this, m.group(3)) != null){ return true; } } @@ -317,9 +278,7 @@ * @return The locant, or null if there is no locant */ String getFirstLocant() { - if(locants.size() == 0) - return null; - return locants.get(0); + return locants.size() > 0 ? locants.get(0) : null; } /**Returns the array of locants containing all locants associated with the atom @@ -335,10 +294,11 @@ * @return The list of locants (may be empty) */ List getElementSymbolLocants() { - List elementSymbolLocants =new ArrayList(); - for (String l : locants) { - if (MATCH_ELEMENT_SYMBOL_LOCANT.matcher(l).matches()) { - elementSymbolLocants.add(l); + List elementSymbolLocants = new ArrayList(1); + for (int i = 0, l = locants.size(); i < l; i++) { + String locant = locants.get(i); + if (MATCH_ELEMENT_SYMBOL_LOCANT.matcher(locant).matches()) { + elementSymbolLocants.add(locant); } } return elementSymbolLocants; @@ -357,23 +317,23 @@ * @return The ID of the atom */ int getID() { - return ID; + return id; } - /**Gets the atomic symbol corresponding to the element of the atom. + /**Gets the chemical element corresponding to the element of the atom. * - * @return The atomic symbol corresponding to the element of the atom + * @return The chemical element corresponding to the element of the atom */ - String getElement() { - return element; + ChemEl getElement() { + return chemEl; } - /**Sets the atomic symbol corresponding to the element of the atom. + /**Sets the chemical element corresponding to the element of the atom. * - * @param elem The atomic symbol corresponding to the element of the atom + * @param chemEl The chemical element corresponding to the element of the atom */ - void setElement(String elem) { - element = elem; + void setElement(ChemEl chemEl) { + this.chemEl = chemEl; } /**Gets the formal charge on the atom. @@ -411,7 +371,6 @@ charge = 0; protonsExplicitlyAddedOrRemoved = 0; } - /** * Gets the mass number of the atom or null if not explicitly defined @@ -427,6 +386,9 @@ * @param isotope */ void setIsotope(Integer isotope) { + if (isotope != null && isotope < chemEl.ATOMIC_NUM) { + throw new RuntimeException("Isotopic mass cannot be less than the element's number of protons: " + chemEl.toString() + " " + isotope + " < " + chemEl.ATOMIC_NUM ); + } this.isotope = isotope; } @@ -435,9 +397,10 @@ * @param b The bond to be added */ void addBond(Bond b) { - if (!bonds.contains(b)){ - bonds.add(b); + if (bonds.contains(b)){ + throw new IllegalArgumentException("Atom already has given bond (This is not allowed as this would give two bonds between the same atoms!)"); } + bonds.add(b); } /**Removes a bond to the atom @@ -457,8 +420,8 @@ */ int getIncomingValency() { int v = 0; - for(Bond b : bonds) { - v += b.getOrder(); + for (int i = 0, l = bonds.size(); i < l; i++) { + v += bonds.get(i).getOrder(); } return v; } @@ -506,15 +469,20 @@ List getBonds() { return Collections.unmodifiableList(bonds); } + + int getBondCount() { + return bonds.size(); + } /**Gets a list of atoms that connect to the atom * * @return The list of atoms connected to the atom */ List getAtomNeighbours(){ - List results = new ArrayList(); - for(Bond b : bonds) { - results.add(b.getOtherAtom(this)); + int bondCount = bonds.size(); + List results = new ArrayList(bondCount); + for (int i = 0; i < bondCount; i++) { + results.add(bonds.get(i).getOtherAtom(this)); } return results; } @@ -584,30 +552,26 @@ * @throws StructureBuildingException */ void ensureSVIsConsistantWithValency(boolean takeIntoAccountExternalBonds) throws StructureBuildingException { - if (spareValency){ + if (spareValency) { Integer maxValency; - if (lambdaConventionValency!=null){ - maxValency=lambdaConventionValency + protonsExplicitlyAddedOrRemoved; + if (lambdaConventionValency != null) { + maxValency = lambdaConventionValency + protonsExplicitlyAddedOrRemoved; } else{ - if (element.equals("C")){ - maxValency = 4 + protonsExplicitlyAddedOrRemoved; - } - else{ - if (ValencyChecker.getHWValency(element)==null){ - throw new StructureBuildingException(element +" is not expected to be aromatic!"); - } - maxValency = ValencyChecker.getHWValency(element) + protonsExplicitlyAddedOrRemoved; + Integer hwValency = ValencyChecker.getHWValency(chemEl); + if (hwValency == null) { + throw new StructureBuildingException(chemEl + " is not expected to be aromatic!"); } + maxValency = hwValency + protonsExplicitlyAddedOrRemoved; } int maxSpareValency; - if (takeIntoAccountExternalBonds){ - maxSpareValency =maxValency-getIncomingValency() -outValency; + if (takeIntoAccountExternalBonds) { + maxSpareValency = maxValency - getIncomingValency() - outValency; } else{ - maxSpareValency =maxValency-frag.getIntraFragmentIncomingValency(this); + maxSpareValency = maxValency - frag.getIntraFragmentIncomingValency(this); } - if (maxSpareValency < 1){ + if (maxSpareValency < 1) { setSpareValency(false); } } @@ -630,7 +594,8 @@ * @return The bond, or null if there is no bond */ Bond getBondToAtom(Atom a) { - for (Bond b : bonds) { + for (int i = 0, l = bonds.size(); i < l; i++) { + Bond b = bonds.get(i); if(b.getOtherAtom(this) == a){ return b; } diff -Nru opsin-1.5.0/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/AtomParity.java opsin-2.3.1/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/AtomParity.java --- opsin-1.5.0/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/AtomParity.java 2013-07-21 14:02:29.000000000 +0000 +++ opsin-2.3.1/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/AtomParity.java 2017-07-23 20:55:18.000000000 +0000 @@ -1,8 +1,5 @@ package uk.ac.cam.ch.wwmm.opsin; -import nu.xom.Attribute; -import nu.xom.Element; - /** * Hold information about 4 atoms and their chiral determinant allowing the description of tetrahedral stereochemistry * @author dl387 @@ -12,11 +9,11 @@ /** * A dummy hydrogen atom. Used to represent an implicit hydrogen that is attached to a tetrahedral stereocentre */ - static final Atom hydrogen = new Atom("H"); + static final Atom hydrogen = new Atom(ChemEl.H); /** * A dummy hydrogen atom. Used to represent the hydrogen that replaced a hydroxy at a tetrahedral stereocentre */ - static final Atom deoxyHydrogen = new Atom("H"); + static final Atom deoxyHydrogen = new Atom(ChemEl.H); private Atom[] atomRefs4; private int parity; @@ -33,25 +30,6 @@ this.parity = parity; } - /** - * Serialises this object to CML - * @return - */ - Element toCML() { - Element atomParityElement = new Element(XmlDeclarations.CML_ATOMPARITY_EL, XmlDeclarations.CML_NAMESPACE); - StringBuilder atomRefsSb = new StringBuilder(); - for(int i=0; i elementToPaulingElectronegativity = new EnumMap(ChemEl.class); + private static final Map elementToHwPriority = new EnumMap(ChemEl.class); + + static{ + elementToPaulingElectronegativity.put(ChemEl.H, 2.20); + elementToPaulingElectronegativity.put(ChemEl.Li, 0.98); + elementToPaulingElectronegativity.put(ChemEl.Be, 1.57); + elementToPaulingElectronegativity.put(ChemEl.B, 2.04); + elementToPaulingElectronegativity.put(ChemEl.C, 2.55); + elementToPaulingElectronegativity.put(ChemEl.N, 3.04); + elementToPaulingElectronegativity.put(ChemEl.O, 3.44); + elementToPaulingElectronegativity.put(ChemEl.F, 3.98); + elementToPaulingElectronegativity.put(ChemEl.Na, 0.93); + elementToPaulingElectronegativity.put(ChemEl.Mg, 1.31); + elementToPaulingElectronegativity.put(ChemEl.Al, 1.61); + elementToPaulingElectronegativity.put(ChemEl.Si, 1.90); + elementToPaulingElectronegativity.put(ChemEl.P, 2.19); + elementToPaulingElectronegativity.put(ChemEl.S, 2.58); + elementToPaulingElectronegativity.put(ChemEl.Cl, 3.16); + elementToPaulingElectronegativity.put(ChemEl.K, 0.82); + elementToPaulingElectronegativity.put(ChemEl.Ca, 1.00); + elementToPaulingElectronegativity.put(ChemEl.Sc, 1.36); + elementToPaulingElectronegativity.put(ChemEl.Ti, 1.54); + elementToPaulingElectronegativity.put(ChemEl.V, 1.63); + elementToPaulingElectronegativity.put(ChemEl.Cr, 1.66); + elementToPaulingElectronegativity.put(ChemEl.Mn, 1.55); + elementToPaulingElectronegativity.put(ChemEl.Fe, 1.83); + elementToPaulingElectronegativity.put(ChemEl.Co, 1.88); + elementToPaulingElectronegativity.put(ChemEl.Ni, 1.91); + elementToPaulingElectronegativity.put(ChemEl.Cu, 1.90); + elementToPaulingElectronegativity.put(ChemEl.Zn, 1.65); + elementToPaulingElectronegativity.put(ChemEl.Ga, 1.81); + elementToPaulingElectronegativity.put(ChemEl.Ge, 2.01); + elementToPaulingElectronegativity.put(ChemEl.As, 2.18); + elementToPaulingElectronegativity.put(ChemEl.Se, 2.55); + elementToPaulingElectronegativity.put(ChemEl.Br, 2.96); + elementToPaulingElectronegativity.put(ChemEl.Kr, 3.00); + elementToPaulingElectronegativity.put(ChemEl.Rb, 0.82); + elementToPaulingElectronegativity.put(ChemEl.Sr, 0.95); + elementToPaulingElectronegativity.put(ChemEl.Y, 1.22); + elementToPaulingElectronegativity.put(ChemEl.Zr, 1.33); + elementToPaulingElectronegativity.put(ChemEl.Nb, 1.6); + elementToPaulingElectronegativity.put(ChemEl.Mo, 2.16); + elementToPaulingElectronegativity.put(ChemEl.Tc, 1.9); + elementToPaulingElectronegativity.put(ChemEl.Ru, 2.2); + elementToPaulingElectronegativity.put(ChemEl.Rh, 2.28); + elementToPaulingElectronegativity.put(ChemEl.Pd, 2.20); + elementToPaulingElectronegativity.put(ChemEl.Ag, 1.93); + elementToPaulingElectronegativity.put(ChemEl.Cd, 1.69); + elementToPaulingElectronegativity.put(ChemEl.In, 1.78); + elementToPaulingElectronegativity.put(ChemEl.Sn, 1.96); + elementToPaulingElectronegativity.put(ChemEl.Sb, 2.05); + elementToPaulingElectronegativity.put(ChemEl.Te, 2.1); + elementToPaulingElectronegativity.put(ChemEl.I, 2.66); + elementToPaulingElectronegativity.put(ChemEl.Xe, 2.60); + elementToPaulingElectronegativity.put(ChemEl.Cs, 0.79); + elementToPaulingElectronegativity.put(ChemEl.Ba, 0.89); + elementToPaulingElectronegativity.put(ChemEl.La, 1.1); + elementToPaulingElectronegativity.put(ChemEl.Ce, 1.12); + elementToPaulingElectronegativity.put(ChemEl.Pr, 1.13); + elementToPaulingElectronegativity.put(ChemEl.Nd, 1.14); + elementToPaulingElectronegativity.put(ChemEl.Pm, 1.13); + elementToPaulingElectronegativity.put(ChemEl.Sm, 1.17); + elementToPaulingElectronegativity.put(ChemEl.Eu, 1.2); + elementToPaulingElectronegativity.put(ChemEl.Gd, 1.2); + elementToPaulingElectronegativity.put(ChemEl.Tb, 1.1); + elementToPaulingElectronegativity.put(ChemEl.Dy, 1.22); + elementToPaulingElectronegativity.put(ChemEl.Ho, 1.23); + elementToPaulingElectronegativity.put(ChemEl.Er, 1.24); + elementToPaulingElectronegativity.put(ChemEl.Tm, 1.25); + elementToPaulingElectronegativity.put(ChemEl.Yb, 1.1); + elementToPaulingElectronegativity.put(ChemEl.Lu, 1.27); + elementToPaulingElectronegativity.put(ChemEl.Hf, 1.3); + elementToPaulingElectronegativity.put(ChemEl.Ta, 1.5); + elementToPaulingElectronegativity.put(ChemEl.W, 2.36); + elementToPaulingElectronegativity.put(ChemEl.Re, 1.9); + elementToPaulingElectronegativity.put(ChemEl.Os, 2.2); + elementToPaulingElectronegativity.put(ChemEl.Ir, 2.20); + elementToPaulingElectronegativity.put(ChemEl.Pt, 2.28); + elementToPaulingElectronegativity.put(ChemEl.Au, 2.54); + elementToPaulingElectronegativity.put(ChemEl.Hg, 2.00); + elementToPaulingElectronegativity.put(ChemEl.Tl, 1.62); + elementToPaulingElectronegativity.put(ChemEl.Pb, 2.33); + elementToPaulingElectronegativity.put(ChemEl.Bi, 2.02); + elementToPaulingElectronegativity.put(ChemEl.Po, 2.0); + elementToPaulingElectronegativity.put(ChemEl.At, 2.2); + elementToPaulingElectronegativity.put(ChemEl.Rn, 2.2); + elementToPaulingElectronegativity.put(ChemEl.Fr, 0.7); + elementToPaulingElectronegativity.put(ChemEl.Ra, 0.9); + elementToPaulingElectronegativity.put(ChemEl.Ac, 1.1); + elementToPaulingElectronegativity.put(ChemEl.Th, 1.3); + elementToPaulingElectronegativity.put(ChemEl.Pa, 1.5); + elementToPaulingElectronegativity.put(ChemEl.U, 1.38); + elementToPaulingElectronegativity.put(ChemEl.Np, 1.36); + elementToPaulingElectronegativity.put(ChemEl.Pu, 1.28); + elementToPaulingElectronegativity.put(ChemEl.Am, 1.13); + elementToPaulingElectronegativity.put(ChemEl.Cm, 1.28); + elementToPaulingElectronegativity.put(ChemEl.Bk, 1.3); + elementToPaulingElectronegativity.put(ChemEl.Cf, 1.3); + elementToPaulingElectronegativity.put(ChemEl.Es, 1.3); + elementToPaulingElectronegativity.put(ChemEl.Fm, 1.3); + elementToPaulingElectronegativity.put(ChemEl.Md, 1.3); + elementToPaulingElectronegativity.put(ChemEl.No, 1.3); + elementToPaulingElectronegativity.put(ChemEl.Lr, 1.3); + + elementToHwPriority.put(ChemEl.F, 23); + elementToHwPriority.put(ChemEl.Cl, 22); + elementToHwPriority.put(ChemEl.Br, 21); + elementToHwPriority.put(ChemEl.I, 20); + elementToHwPriority.put(ChemEl.O, 19); + elementToHwPriority.put(ChemEl.S, 18); + elementToHwPriority.put(ChemEl.Se, 17); + elementToHwPriority.put(ChemEl.Te, 16); + elementToHwPriority.put(ChemEl.N, 15); + elementToHwPriority.put(ChemEl.P, 14); + elementToHwPriority.put(ChemEl.As, 13); + elementToHwPriority.put(ChemEl.Sb, 12); + elementToHwPriority.put(ChemEl.Bi, 11); + elementToHwPriority.put(ChemEl.Si, 10); + elementToHwPriority.put(ChemEl.Ge, 9); + elementToHwPriority.put(ChemEl.Sn, 8); + elementToHwPriority.put(ChemEl.Pb, 7); + elementToHwPriority.put(ChemEl.B, 6); + elementToHwPriority.put(ChemEl.Al, 5); + elementToHwPriority.put(ChemEl.Ga, 4); + elementToHwPriority.put(ChemEl.In, 3); + elementToHwPriority.put(ChemEl.Tl, 2); + elementToHwPriority.put(ChemEl.Hg, 1); + } + /** * Useful to give an indication of whether a bond is like to be ionic (diff >1.8), polar or covalent (diff < 1.2) + * @param chemEl + * @return */ - static final Map elementToPaulingElectronegativity = new HashMap(); - - static final Map elementToAtomicNumber = new HashMap(); - + static Double getPaulingElectronegativity(ChemEl chemEl) { + return elementToPaulingElectronegativity.get(chemEl); + } + /** - * Maps element symbol to the priority of that atom in Hantzch-Widman system. A higher value indicates a higher priority. + * Maps chemEl to the priority of that atom in Hantzch-Widman system. A higher value indicates a higher priority. + * @param chemEl + * @return */ - static final Map elementToHwPriority = new HashMap(); - - static{ - elementToPaulingElectronegativity.put("H", 2.20); - elementToPaulingElectronegativity.put("Li", 0.98); - elementToPaulingElectronegativity.put("Be", 1.57); - elementToPaulingElectronegativity.put("B", 2.04); - elementToPaulingElectronegativity.put("C", 2.55); - elementToPaulingElectronegativity.put("N", 3.04); - elementToPaulingElectronegativity.put("O", 3.44); - elementToPaulingElectronegativity.put("F", 3.98); - elementToPaulingElectronegativity.put("Na", 0.93); - elementToPaulingElectronegativity.put("Mg", 1.31); - elementToPaulingElectronegativity.put("Al", 1.61); - elementToPaulingElectronegativity.put("Si", 1.90); - elementToPaulingElectronegativity.put("P", 2.19); - elementToPaulingElectronegativity.put("S", 2.58); - elementToPaulingElectronegativity.put("Cl", 3.16); - elementToPaulingElectronegativity.put("K", 0.82); - elementToPaulingElectronegativity.put("Ca", 1.00); - elementToPaulingElectronegativity.put("Sc", 1.36); - elementToPaulingElectronegativity.put("Ti", 1.54); - elementToPaulingElectronegativity.put("V", 1.63); - elementToPaulingElectronegativity.put("Cr", 1.66); - elementToPaulingElectronegativity.put("Mn", 1.55); - elementToPaulingElectronegativity.put("Fe", 1.83); - elementToPaulingElectronegativity.put("Co", 1.88); - elementToPaulingElectronegativity.put("Ni", 1.91); - elementToPaulingElectronegativity.put("Cu", 1.90); - elementToPaulingElectronegativity.put("Zn", 1.65); - elementToPaulingElectronegativity.put("Ga", 1.81); - elementToPaulingElectronegativity.put("Ge", 2.01); - elementToPaulingElectronegativity.put("As", 2.18); - elementToPaulingElectronegativity.put("Se", 2.55); - elementToPaulingElectronegativity.put("Br", 2.96); - elementToPaulingElectronegativity.put("Kr", 3.00); - elementToPaulingElectronegativity.put("Rb", 0.82); - elementToPaulingElectronegativity.put("Sr", 0.95); - elementToPaulingElectronegativity.put("Y", 1.22); - elementToPaulingElectronegativity.put("Zr", 1.33); - elementToPaulingElectronegativity.put("Nb", 1.6); - elementToPaulingElectronegativity.put("Mo", 2.16); - elementToPaulingElectronegativity.put("Tc", 1.9); - elementToPaulingElectronegativity.put("Ru", 2.2); - elementToPaulingElectronegativity.put("Rh", 2.28); - elementToPaulingElectronegativity.put("Pd", 2.20); - elementToPaulingElectronegativity.put("Ag", 1.93); - elementToPaulingElectronegativity.put("Cd", 1.69); - elementToPaulingElectronegativity.put("In", 1.78); - elementToPaulingElectronegativity.put("Sn", 1.96); - elementToPaulingElectronegativity.put("Sb", 2.05); - elementToPaulingElectronegativity.put("Te", 2.1); - elementToPaulingElectronegativity.put("I", 2.66); - elementToPaulingElectronegativity.put("Xe", 2.60); - elementToPaulingElectronegativity.put("Cs", 0.79); - elementToPaulingElectronegativity.put("Ba", 0.89); - elementToPaulingElectronegativity.put("La", 1.1); - elementToPaulingElectronegativity.put("Ce", 1.12); - elementToPaulingElectronegativity.put("Pr", 1.13); - elementToPaulingElectronegativity.put("Nd", 1.14); - elementToPaulingElectronegativity.put("Pm", 1.13); - elementToPaulingElectronegativity.put("Sm", 1.17); - elementToPaulingElectronegativity.put("Eu", 1.2); - elementToPaulingElectronegativity.put("Gd", 1.2); - elementToPaulingElectronegativity.put("Tb", 1.1); - elementToPaulingElectronegativity.put("Dy", 1.22); - elementToPaulingElectronegativity.put("Ho", 1.23); - elementToPaulingElectronegativity.put("Er", 1.24); - elementToPaulingElectronegativity.put("Tm", 1.25); - elementToPaulingElectronegativity.put("Yb", 1.1); - elementToPaulingElectronegativity.put("Lu", 1.27); - elementToPaulingElectronegativity.put("Hf", 1.3); - elementToPaulingElectronegativity.put("Ta", 1.5); - elementToPaulingElectronegativity.put("W", 2.36); - elementToPaulingElectronegativity.put("Re", 1.9); - elementToPaulingElectronegativity.put("Os", 2.2); - elementToPaulingElectronegativity.put("Ir", 2.20); - elementToPaulingElectronegativity.put("Pt", 2.28); - elementToPaulingElectronegativity.put("Au", 2.54); - elementToPaulingElectronegativity.put("Hg", 2.00); - elementToPaulingElectronegativity.put("Tl", 1.62); - elementToPaulingElectronegativity.put("Pb", 2.33); - elementToPaulingElectronegativity.put("Bi", 2.02); - elementToPaulingElectronegativity.put("Po", 2.0); - elementToPaulingElectronegativity.put("At", 2.2); - elementToPaulingElectronegativity.put("Rn", 2.2); - elementToPaulingElectronegativity.put("Fr", 0.7); - elementToPaulingElectronegativity.put("Ra", 0.9); - elementToPaulingElectronegativity.put("Ac", 1.1); - elementToPaulingElectronegativity.put("Th", 1.3); - elementToPaulingElectronegativity.put("Pa", 1.5); - elementToPaulingElectronegativity.put("U", 1.38); - elementToPaulingElectronegativity.put("Np", 1.36); - elementToPaulingElectronegativity.put("Pu", 1.28); - elementToPaulingElectronegativity.put("Am", 1.13); - elementToPaulingElectronegativity.put("Cm", 1.28); - elementToPaulingElectronegativity.put("Bk", 1.3); - elementToPaulingElectronegativity.put("Cf", 1.3); - elementToPaulingElectronegativity.put("Es", 1.3); - elementToPaulingElectronegativity.put("Fm", 1.3); - elementToPaulingElectronegativity.put("Md", 1.3); - elementToPaulingElectronegativity.put("No", 1.3); - elementToPaulingElectronegativity.put("Lr", 1.3); - - elementToAtomicNumber.put("H", 1); - elementToAtomicNumber.put("He", 2); - elementToAtomicNumber.put("Li", 3); - elementToAtomicNumber.put("Be", 4); - elementToAtomicNumber.put("B", 5); - elementToAtomicNumber.put("C", 6); - elementToAtomicNumber.put("N", 7); - elementToAtomicNumber.put("O", 8); - elementToAtomicNumber.put("F", 9); - elementToAtomicNumber.put("Ne", 10); - elementToAtomicNumber.put("Na", 11); - elementToAtomicNumber.put("Mg", 12); - elementToAtomicNumber.put("Al", 13); - elementToAtomicNumber.put("Si", 14); - elementToAtomicNumber.put("P", 15); - elementToAtomicNumber.put("S", 16); - elementToAtomicNumber.put("Cl", 17); - elementToAtomicNumber.put("Ar", 18); - elementToAtomicNumber.put("K", 19); - elementToAtomicNumber.put("Ca", 20); - elementToAtomicNumber.put("Sc", 21); - elementToAtomicNumber.put("Ti", 22); - elementToAtomicNumber.put("V", 23); - elementToAtomicNumber.put("Cr", 24); - elementToAtomicNumber.put("Mn", 25); - elementToAtomicNumber.put("Fe", 26); - elementToAtomicNumber.put("Co", 27); - elementToAtomicNumber.put("Ni", 28); - elementToAtomicNumber.put("Cu", 29); - elementToAtomicNumber.put("Zn", 30); - elementToAtomicNumber.put("Ga", 31); - elementToAtomicNumber.put("Ge", 32); - elementToAtomicNumber.put("As", 33); - elementToAtomicNumber.put("Se", 34); - elementToAtomicNumber.put("Br", 35); - elementToAtomicNumber.put("Kr", 36); - elementToAtomicNumber.put("Rb", 37); - elementToAtomicNumber.put("Sr", 38); - elementToAtomicNumber.put("Y", 39); - elementToAtomicNumber.put("Zr", 40); - elementToAtomicNumber.put("Nb", 41); - elementToAtomicNumber.put("Mo", 42); - elementToAtomicNumber.put("Tc", 43); - elementToAtomicNumber.put("Ru", 44); - elementToAtomicNumber.put("Rh", 45); - elementToAtomicNumber.put("Pd", 46); - elementToAtomicNumber.put("Ag", 47); - elementToAtomicNumber.put("Cd", 48); - elementToAtomicNumber.put("In", 49); - elementToAtomicNumber.put("Sn", 50); - elementToAtomicNumber.put("Sb", 51); - elementToAtomicNumber.put("Te", 52); - elementToAtomicNumber.put("I", 53); - elementToAtomicNumber.put("Xe", 54); - elementToAtomicNumber.put("Cs", 55); - elementToAtomicNumber.put("Ba", 56); - elementToAtomicNumber.put("La", 57); - elementToAtomicNumber.put("Ce", 58); - elementToAtomicNumber.put("Pr", 59); - elementToAtomicNumber.put("Nd", 60); - elementToAtomicNumber.put("Pm", 61); - elementToAtomicNumber.put("Sm", 62); - elementToAtomicNumber.put("Eu", 63); - elementToAtomicNumber.put("Gd", 64); - elementToAtomicNumber.put("Tb", 65); - elementToAtomicNumber.put("Dy", 66); - elementToAtomicNumber.put("Ho", 67); - elementToAtomicNumber.put("Er", 68); - elementToAtomicNumber.put("Tm", 69); - elementToAtomicNumber.put("Yb", 70); - elementToAtomicNumber.put("Lu", 71); - elementToAtomicNumber.put("Hf", 72); - elementToAtomicNumber.put("Ta", 73); - elementToAtomicNumber.put("W", 74); - elementToAtomicNumber.put("Re", 75); - elementToAtomicNumber.put("Os", 76); - elementToAtomicNumber.put("Ir", 77); - elementToAtomicNumber.put("Pt", 78); - elementToAtomicNumber.put("Au", 79); - elementToAtomicNumber.put("Hg", 80); - elementToAtomicNumber.put("Tl", 81); - elementToAtomicNumber.put("Pb", 82); - elementToAtomicNumber.put("Bi", 83); - elementToAtomicNumber.put("Po", 84); - elementToAtomicNumber.put("At", 85); - elementToAtomicNumber.put("Rn", 86); - elementToAtomicNumber.put("Fr", 87); - elementToAtomicNumber.put("Ra", 88); - elementToAtomicNumber.put("Ac", 89); - elementToAtomicNumber.put("Th", 90); - elementToAtomicNumber.put("Pa", 91); - elementToAtomicNumber.put("U", 92); - elementToAtomicNumber.put("Np", 93); - elementToAtomicNumber.put("Pu", 94); - elementToAtomicNumber.put("Am", 95); - elementToAtomicNumber.put("Cm", 96); - elementToAtomicNumber.put("Bk", 97); - elementToAtomicNumber.put("Cf", 98); - elementToAtomicNumber.put("Es", 99); - elementToAtomicNumber.put("Fm", 100); - elementToAtomicNumber.put("Md", 101); - elementToAtomicNumber.put("No", 102); - elementToAtomicNumber.put("Lr", 103); - elementToAtomicNumber.put("Rf", 104); - elementToAtomicNumber.put("Db", 105); - elementToAtomicNumber.put("Sg", 106); - elementToAtomicNumber.put("Bh", 107); - elementToAtomicNumber.put("Hs", 108); - elementToAtomicNumber.put("Mt", 109); - elementToAtomicNumber.put("Ds", 110); - - elementToHwPriority.put("F", 23); - elementToHwPriority.put("Cl", 22); - elementToHwPriority.put("Br", 21); - elementToHwPriority.put("I", 20); - elementToHwPriority.put("O", 19); - elementToHwPriority.put("S", 18); - elementToHwPriority.put("Se", 17); - elementToHwPriority.put("Te", 16); - elementToHwPriority.put("N", 15); - elementToHwPriority.put("P", 14); - elementToHwPriority.put("As", 13); - elementToHwPriority.put("Sb", 12); - elementToHwPriority.put("Bi", 11); - elementToHwPriority.put("Si", 10); - elementToHwPriority.put("Ge", 9); - elementToHwPriority.put("Sn", 8); - elementToHwPriority.put("Pb", 7); - elementToHwPriority.put("B", 6); - elementToHwPriority.put("Al", 5); - elementToHwPriority.put("Ga", 4); - elementToHwPriority.put("In", 3); - elementToHwPriority.put("Tl", 2); - elementToHwPriority.put("Hg", 1); + static Integer getHwpriority(ChemEl chemEl) { + return elementToHwPriority.get(chemEl); } } diff -Nru opsin-1.5.0/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/Attribute.java opsin-2.3.1/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/Attribute.java --- opsin-1.5.0/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/Attribute.java 1970-01-01 00:00:00.000000000 +0000 +++ opsin-2.3.1/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/Attribute.java 2017-07-23 20:55:18.000000000 +0000 @@ -0,0 +1,74 @@ +package uk.ac.cam.ch.wwmm.opsin; + +class Attribute { + + private final String name; + private String value; + + Attribute(String name, String value) { + this.name = name; + this.value = value; + } + + /** + * Creates a copy + * @param attribute + */ + Attribute(Attribute attribute) { + this.name = attribute.getName(); + this.value = attribute.getValue(); + } + + String getValue() { + return value; + } + + String getName() { + return name; + } + + void setValue(String value) { + this.value = value; + } + + String toXML() { + return getName() + "=\"" + escapeText(value) + "\""; + } + + public String toString() { + return name +"\t" + value; + } + + private String escapeText(String s) { + StringBuilder result = new StringBuilder(); + for (int i = 0, l = s.length(); i < l; i++) { + char c = s.charAt(i); + switch (c) { + case '\t': + result.append(" "); + break; + case '\n': + result.append(" "); + break; + case '\r': + result.append(" "); + break; + case '"': + result.append("""); + break; + case '&': + result.append("&"); + break; + case '<': + result.append("<"); + break; + case '>': + result.append(">"); + break; + default: + result.append(c); + } + } + return result.toString(); + } +} \ No newline at end of file diff -Nru opsin-1.5.0/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/AutomatonInitialiser.java opsin-2.3.1/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/AutomatonInitialiser.java --- opsin-1.5.0/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/AutomatonInitialiser.java 2013-07-21 14:02:29.000000000 +0000 +++ opsin-2.3.1/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/AutomatonInitialiser.java 2017-07-23 20:55:18.000000000 +0000 @@ -1,5 +1,6 @@ package uk.ac.cam.ch.wwmm.opsin; +import java.io.BufferedInputStream; import java.io.IOException; import java.io.InputStream; import java.io.OutputStream; @@ -22,7 +23,11 @@ class AutomatonInitialiser { private static final Logger LOG = Logger.getLogger(AutomatonInitialiser.class); - private static final ResourceGetter resourceGetter = new ResourceGetter("uk/ac/cam/ch/wwmm/opsin/resources/serialisedAutomata/"); + private final ResourceGetter resourceGetter; + + AutomatonInitialiser(String resourcePath) { + resourceGetter = new ResourceGetter(resourcePath); + } /** * In preference serialised automata and their hashes will be looked for in the resource folder in your working directory @@ -36,7 +41,7 @@ * @param tableize: if true, a transition table is created which makes the run method faster in return of a higher memory usage (adds ~256kb) * @return A RunAutomaton, may have been built from scratch or loaded from a file */ - static RunAutomaton loadAutomaton(String automatonName, String regex, boolean tableize, boolean reverseAutomaton) { + RunAutomaton loadAutomaton(String automatonName, String regex, boolean tableize, boolean reverseAutomaton) { if (reverseAutomaton){ automatonName+="_reversed_"; } @@ -53,25 +58,25 @@ return automaton; } - private static boolean isAutomatonCached(String automatonName, String regex) { + private boolean isAutomatonCached(String automatonName, String regex) { String currentRegexHash = getRegexHash(regex); String cachedRegexHash = getCachedRegexHash(automatonName); return currentRegexHash.equals(cachedRegexHash); } - private static String getRegexHash(String regex) { + private String getRegexHash(String regex) { return Integer.toString(regex.hashCode()); } - private static String getCachedRegexHash(String automatonName) { + private String getCachedRegexHash(String automatonName) { /*This file contains the hashcode of the regex which was used to generate the automaton on the disk */ return resourceGetter.getFileContentsAsString(automatonName + "RegexHash.txt"); } - private static RunAutomaton loadCachedAutomaton(String automatonName) throws IOException{ + private RunAutomaton loadCachedAutomaton(String automatonName) throws IOException{ InputStream automatonInput = resourceGetter.getInputstreamFromFileName(automatonName +"SerialisedAutomaton.aut"); try { - return RunAutomaton.load(automatonInput); + return RunAutomaton.load(new BufferedInputStream(automatonInput)); } catch (Exception e) { IOException ioe = new IOException("Error loading automaton"); ioe.initCause(e); @@ -86,11 +91,10 @@ if (reverseAutomaton){ SpecialOperations.reverse(a); } - RunAutomaton ra = new RunAutomaton(a, tableize); - return ra; + return new RunAutomaton(a, tableize); } - private static void cacheAutomaton(String automatonName, RunAutomaton automaton, String regex) { + private void cacheAutomaton(String automatonName, RunAutomaton automaton, String regex) { OutputStream regexHashOutputStream = null; OutputStream automatonOutputStream = null; try { diff -Nru opsin-1.5.0/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/Bond.java opsin-2.3.1/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/Bond.java --- opsin-1.5.0/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/Bond.java 2013-07-21 14:02:29.000000000 +0000 +++ opsin-2.3.1/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/Bond.java 2017-07-23 20:55:18.000000000 +0000 @@ -1,13 +1,7 @@ package uk.ac.cam.ch.wwmm.opsin; -import java.util.ArrayList; -import java.util.List; - import uk.ac.cam.ch.wwmm.opsin.BondStereo.BondStereoValue; -import nu.xom.Attribute; -import nu.xom.Element; - /**A bond, between two atoms. * * @author ptc24 @@ -34,12 +28,7 @@ * Holds the bondStereo object associated with this bond * null by default */ - private BondStereo bondStereo= null; - - /** - * If the bond is a fusion bond this will in the fused ring numberer be populated with the rings that it connects - */ - private final List fusedRings = new ArrayList(2); + private BondStereo bondStereo = null; /** DO NOT CALL DIRECTLY EXCEPT FOR TESTING * Creates a new Bond. @@ -49,49 +38,21 @@ * @param order The bond order. */ Bond(Atom from, Atom to, int order) { - this.from = from; - this.to = to; - this.order = order; - } - - List getFusedRings() { - return fusedRings; - } - - void addFusedRing(Ring ring) { - if (fusedRings.size()<2) { - fusedRings.add(ring); - } - } - - - /**Produces a nu.xom.Element corresponding to a CML bond tag. - * Has attributes of atomRefs2 and order. - * - * @return The CML element. - */ - Element toCMLBond() { - Element elem = new Element("bond", XmlDeclarations.CML_NAMESPACE); - elem.addAttribute(new Attribute("id", "a" + Integer.toString(from.getID()) - + "_a" + Integer.toString(to.getID()))); - elem.addAttribute(new Attribute("atomRefs2", "a" + Integer.toString(from.getID()) - + " a" + Integer.toString(to.getID()))); - if (order==1){ - elem.addAttribute(new Attribute("order", "S")); - } - else if (order==2){ - elem.addAttribute(new Attribute("order", "D")); + if (from == to){ + throw new IllegalArgumentException("Bonds must be made between different atoms"); } - else if (order==3){ - elem.addAttribute(new Attribute("order", "T")); + if (order < 1 || order > 3){ + throw new IllegalArgumentException("Bond order must be 1, 2 or 3"); } - else{ - elem.addAttribute(new Attribute("order", "unknown")); + if (from == null){ + throw new IllegalArgumentException("From atom was null!"); } - if (bondStereo!=null){ - elem.appendChild(bondStereo.toCML()); + if (to == null){ + throw new IllegalArgumentException("To atom was null!"); } - return elem; + this.from = from; + this.to = to; + this.order = order; } /** @@ -176,14 +137,48 @@ * @return */ Atom getOtherAtom(Atom atom) { - if (from ==atom){ + if (from == atom){ return to; } - else if (to ==atom){ + else if (to == atom){ return from; } else{ return null; } } + + @Override + public int hashCode() { + final int prime = 31; + int result = 1; + result = prime * result + from.getID(); + result = prime * result + to.getID(); + return result; + } + + @Override + public boolean equals(Object obj) { + if (this == obj) { + return true; + } + if (obj == null) { + return false; + } + if (getClass() != obj.getClass()) { + return false; + } + Bond other = (Bond) obj; + + if (from == other.from && + to == other.to){ + return true; + } + if (from == other.to && + to == other.from){ + return true; + } + + return false; + } } diff -Nru opsin-1.5.0/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/BondStereo.java opsin-2.3.1/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/BondStereo.java --- opsin-1.5.0/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/BondStereo.java 2013-07-21 14:02:29.000000000 +0000 +++ opsin-2.3.1/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/BondStereo.java 2017-07-23 20:55:18.000000000 +0000 @@ -1,8 +1,5 @@ package uk.ac.cam.ch.wwmm.opsin; -import nu.xom.Attribute; -import nu.xom.Element; - /** * Holds information about the positions of 2 atoms relative to a double bond allowing the specification of cis/trans stereochemistry * @author dl387 @@ -31,8 +28,7 @@ return value; } } - - + /** * Create a bondStereo from an array of 4 atoms. The 2nd and 3rd atoms of this array are connected via a double bond. * The 1st and 4th atoms are at either end of this bond and indication is given as to whether they are cis or trans to each other. @@ -46,26 +42,6 @@ this.atomRefs4 = atomRefs4; this.bondStereoValue = cOrT; } - - /** - * Serialises this object to CML - * @return - */ - Element toCML() { - Element bondStereoElement = new Element(XmlDeclarations.CML_BONDSTEREO_EL, XmlDeclarations.CML_NAMESPACE); - StringBuilder atomRefsSb = new StringBuilder(); - for(int i=0; i outAtoms; + private final List outAtoms = new ArrayList(); /**The atoms that may be used to from things like esters*/ - private final LinkedList functionalAtoms; + private final List functionalAtoms = new ArrayList(); /**A list of fragments that have been evaluated to form this BuildResults. They are in the order they would be found in the XML*/ - private final LinkedHashSet fragments; + private final Set fragments = new LinkedHashSet(); /**A BuildResults is constructed from a list of Fragments. * This constructor creates this list from the groups present in an XML word/bracket/sub element. - * @param state * @param wordSubOrBracket*/ - BuildResults(BuildState state, Element wordSubOrBracket) { - outAtoms = new LinkedList(); - functionalAtoms = new LinkedList(); - fragments = new LinkedHashSet(); - List groups = XOMTools.getDescendantElementsWithTagName(wordSubOrBracket, XmlDeclarations.GROUP_EL); + BuildResults(Element wordSubOrBracket) { + List groups = OpsinTools.getDescendantElementsWithTagName(wordSubOrBracket, XmlDeclarations.GROUP_EL); for (Element group : groups) { - Fragment frag = state.xmlFragmentMap.get(group); + Fragment frag = group.getFrag(); fragments.add(frag); for (int i = 0, l = frag.getOutAtomCount(); i < l; i++) { outAtoms.add(frag.getOutAtom(i)); } int functionalAtomCount = frag.getFunctionalAtomCount(); if (functionalAtomCount > 0){ - Element parent = (Element) group.getParent(); - if (parent.getLocalName().equals(XmlDeclarations.ROOT_EL) || - OpsinTools.getNextGroup(group) == null){ - for (int i = 0, l = functionalAtomCount; i < l; i++) { + Element parent = group.getParent(); + if (parent.getName().equals(XmlDeclarations.ROOT_EL) || + OpsinTools.getNextGroup(group) == null) { + for (int i = 0; i < functionalAtomCount; i++) { functionalAtoms.add(frag.getFunctionalAtom(i)); } } } - } } /** * Construct a blank buildResults */ - BuildResults() { - outAtoms = new LinkedList(); - functionalAtoms = new LinkedList(); - fragments = new LinkedHashSet(); - } + BuildResults() {} /** * Returns a read only view of the fragments in this BuildResults @@ -78,33 +67,16 @@ return fragments.size(); } - /** - * Returns the atom corresponding to position i in the outAtoms list - * If not set explicitly and atom would violate valency or break aromaticity another is looked for - * @param i index - * @return atom - * @throws StructureBuildingException - */ - Atom getOutAtomTakingIntoAccountWhetherSetExplicitly(int i) throws StructureBuildingException { - OutAtom outAtom = outAtoms.get(i); - if (outAtom.isSetExplicitly()){ - return outAtom.getAtom(); - } - else{ - return outAtom.getAtom().getFrag().getAtomOrNextSuitableAtomOrThrow(outAtom.getAtom(), outAtom.getValency(), false); - } - } - - OutAtom getOutAtom(int i){ + OutAtom getOutAtom(int i) { return outAtoms.get(i); } - int getOutAtomCount(){ + int getOutAtomCount() { return outAtoms.size(); } OutAtom removeOutAtom(int i) { - OutAtom outAtom =outAtoms.get(i); + OutAtom outAtom = outAtoms.get(i); outAtom.getAtom().getFrag().removeOutAtom(outAtom); return outAtoms.remove(i); } @@ -120,12 +92,12 @@ * @param i index * @return atom */ - Atom getFunctionalAtom(int i){ + Atom getFunctionalAtom(int i) { return functionalAtoms.get(i).getAtom(); } FunctionalAtom removeFunctionalAtom(int i) { - FunctionalAtom functionalAtom =functionalAtoms.get(i); + FunctionalAtom functionalAtom = functionalAtoms.get(i); functionalAtom.getAtom().getFrag().removeFunctionalAtom(functionalAtom); return functionalAtoms.remove(i); } @@ -138,7 +110,7 @@ * Returns the first OutAtom * @return OutAtom */ - OutAtom getFirstOutAtom(){ + OutAtom getFirstOutAtom() { return outAtoms.get(0); } @@ -151,7 +123,7 @@ Atom getAtomByIdOrThrow(int id) throws StructureBuildingException { for (Fragment fragment : fragments) { Atom outAtom =fragment.getAtomByID(id); - if (outAtom!=null){ + if (outAtom != null){ return outAtom; } } @@ -169,9 +141,9 @@ * @return */ int getCharge() { - int totalCharge=0; + int totalCharge = 0; for (Fragment frag : fragments) { - totalCharge+=frag.getCharge(); + totalCharge += frag.getCharge(); } return totalCharge; } diff -Nru opsin-1.5.0/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/BuildState.java opsin-2.3.1/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/BuildState.java --- opsin-1.5.0/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/BuildState.java 2013-07-21 14:02:29.000000000 +0000 +++ opsin-2.3.1/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/BuildState.java 2017-07-23 20:55:18.000000000 +0000 @@ -1,104 +1,43 @@ package uk.ac.cam.ch.wwmm.opsin; -import java.util.Collection; +import java.util.ArrayList; import java.util.HashMap; import java.util.List; -import java.util.Map; -import java.util.Set; - -import nu.xom.Element; +import uk.ac.cam.ch.wwmm.opsin.OpsinWarning.OpsinWarningType; /** - * Used to pass the current mode, IDManager, FragmentManager and wordRule around as well as a mapping between the XML and fragments + * Used to pass the current configuration and FragmentManager around + * The currentWordRule can be mutated to keep track of what the parent wordRule is at the given time * * @author dl387 * */ class BuildState { - final IDManager idManager; final FragmentManager fragManager; - final BiDirectionalHashMap xmlFragmentMap; final HashMap> xmlSuffixMap; final NameToStructureConfig n2sConfig; + private final List warnings = new ArrayList(); WordRule currentWordRule = null; - - private String warningMessage = null; - - String getWarningMessage() { - return warningMessage; - } - void addWarningMessage(String warningMessage) { - if (warningMessage == null){ - this.warningMessage = warningMessage; - } - else{ - this.warningMessage += ("\n" + warningMessage); - } + BuildState(NameToStructureConfig n2sConfig) { + this.n2sConfig = n2sConfig; + IDManager idManager = new IDManager(); + fragManager = new FragmentManager(new SMILESFragmentBuilder(idManager), idManager); + xmlSuffixMap = new HashMap>(); } - /** - * Wrapper class for returning multiple objects - */ - final static class BiDirectionalHashMap implements Map{ - final HashMap xmlFragmentMap = new HashMap(); - final HashMap fragmentXmlMap = new HashMap(); - public void clear() { - xmlFragmentMap.clear(); - fragmentXmlMap.clear(); - } - public boolean containsKey(Object key) { - return xmlFragmentMap.containsKey(key); - } - public boolean containsValue(Object value) { - return xmlFragmentMap.containsValue(value); - } - public Set> entrySet() { - return xmlFragmentMap.entrySet(); - } - public Fragment get(Object key) { - return xmlFragmentMap.get(key); - } - public boolean isEmpty() { - return xmlFragmentMap.isEmpty(); - } - public Set keySet() { - return xmlFragmentMap.keySet(); - } - public Fragment put(Element key, Fragment value) { - fragmentXmlMap.put(value, key); - return xmlFragmentMap.put(key, value); - } - public void putAll(Map m) { - for (Entry e : m.entrySet()) { - fragmentXmlMap.put(e.getValue(), e.getKey()); - } - xmlFragmentMap.putAll(m); - } - public Fragment remove(Object key) { - Fragment f =xmlFragmentMap.remove(key); - fragmentXmlMap.remove(f); - return f; - } - public int size() { - return xmlFragmentMap.size(); - } - public Collection values() { - return xmlFragmentMap.values(); - } - public Element getElement(Fragment key) { - return fragmentXmlMap.get(key); - } + List getWarnings() { + return warnings; } - - BuildState(NameToStructureConfig n2sConfig, SMILESFragmentBuilder sBuilder) { - this.n2sConfig = n2sConfig; - idManager = new IDManager(); - fragManager = new FragmentManager(sBuilder, idManager); - xmlFragmentMap = new BiDirectionalHashMap(); - xmlSuffixMap = new HashMap>(); + + void addWarning(OpsinWarningType type, String message) { + warnings.add(new OpsinWarning(type, message)); + } + + void addIsAmbiguous(String message) { + warnings.add(new OpsinWarning(OpsinWarningType.APPEARS_AMBIGUOUS, message)); } } diff -Nru opsin-1.5.0/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/CASTools.java opsin-2.3.1/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/CASTools.java --- opsin-1.5.0/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/CASTools.java 2013-07-21 14:02:29.000000000 +0000 +++ opsin-2.3.1/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/CASTools.java 2017-07-23 20:55:18.000000000 +0000 @@ -5,7 +5,6 @@ import java.util.List; import java.util.regex.Matcher; import java.util.regex.Pattern; -import static uk.ac.cam.ch.wwmm.opsin.OpsinTools.*; /** * Tools for converting CAS nomenclature into IUPAC nomenclature. @@ -34,7 +33,7 @@ List functionalTerms = new ArrayList(); String parent = nameComponents.get(0); - String[] parentNameParts = MATCH_SPACE.split(parent); + String[] parentNameParts = parent.split(" "); if (parentNameParts.length != 1) { if (matchCasCollectiveIndex.matcher(parentNameParts[parentNameParts.length - 1]).matches()) {//CAS collective index description should be ignored StringBuilder parentSB = new StringBuilder(); @@ -42,7 +41,7 @@ parentSB.append(parentNameParts[i]); } parent = parentSB.toString(); - parentNameParts = MATCH_SPACE.split(parent); + parentNameParts = parent.split(" "); } for (int i = 1; i < parentNameParts.length; i++) { if (!matchAcid.matcher(parentNameParts[i]).matches()) { @@ -64,7 +63,7 @@ nameComponent = nameComponent.substring(m.group().length()); compoundWithcomponent = true; } - String[] components = MATCH_SPACE.split(nameComponents.get(i)); + String[] components = nameComponents.get(i).split(" "); for (String component : components) { if (compoundWithcomponent) { functionalTerms.add(component); @@ -94,8 +93,9 @@ } } - if (parseWords.size() ==1){ - if (firstWordType.equals(WordType.functionalTerm)) { + if (parseWords.size() == 1) { + switch (firstWordType) { + case functionalTerm: if (component.equalsIgnoreCase("ester")) { if (seperateWordSubstituents.size() ==0){ throw new ParsingException("ester encountered but no substituents were specified in potential CAS name!"); @@ -108,15 +108,22 @@ } else { functionalTerms.add(component); } - } else if (firstWordType.equals(WordType.substituent)) { + break; + case substituent: seperateWordSubstituents.add(component); - } else if (firstWordType.equals(WordType.full)) { + break; + case full: if (StringTools.endsWithCaseInsensitive(component, "ate") || StringTools.endsWithCaseInsensitive(component, "ite")//e.g. Piperazinium, 1,1-dimethyl-, 2,2,2-trifluoroacetate hydrochloride - || component.equalsIgnoreCase("hydrofluoride") || component.equalsIgnoreCase("hydrochloride") || component.equalsIgnoreCase("hydrobromide") || component.equalsIgnoreCase("hydroiodide")) { + || StringTools.endsWithCaseInsensitive(component, "ium") + || StringTools.endsWithCaseInsensitive(component, "hydrofluoride") || StringTools.endsWithCaseInsensitive(component, "hydrochloride") + || StringTools.endsWithCaseInsensitive(component, "hydrobromide") || StringTools.endsWithCaseInsensitive(component, "hydroiodide")) { functionalTerms.add(component); } else { throw new ParsingException("Unable to interpret: " + component + " (as part of a CAS index name)- A full word was encountered where a substituent or functionalTerm was expected"); } + break; + default: + throw new ParsingException("Unrecognised CAS index name form"); } } else if (parseWords.size() == 2 && firstWordType.equals(WordType.substituent)) { @@ -152,8 +159,9 @@ casName.append(prefixFunctionalTerm); casName.append(" "); } - for (String substituent : substituents) { - casName.append(substituent); + for (int i = substituents.size() - 1; i >= 0; i--) { + //stereochemistry term comes after substituent term. In older CAS names (9CI) this stereochemistry term can apply to the substituent term. Hence append in reverse order + casName.append(substituents.get(i)); } casName.append(parent); for (String functionalTerm : functionalTerms) { @@ -164,11 +172,10 @@ } private static Character missingCloseBracketCharIfApplicable(String component) { - char[] characters = component.toCharArray(); int bracketLevel =0; Character missingCloseBracket =null; - for (int i = 0; i < characters.length; i++) { - char character = characters[i]; + for (int i = 0, l = component.length(); i < l; i++) { + char character = component.charAt(i); if (character == '(' || character == '[' || character == '{') { bracketLevel++; if (bracketLevel ==1){ diff -Nru opsin-1.5.0/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/ChemEl.java opsin-2.3.1/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/ChemEl.java --- opsin-1.5.0/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/ChemEl.java 1970-01-01 00:00:00.000000000 +0000 +++ opsin-2.3.1/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/ChemEl.java 2017-07-23 20:55:18.000000000 +0000 @@ -0,0 +1,134 @@ +package uk.ac.cam.ch.wwmm.opsin; + +enum ChemEl { + R(0), + + H(1), + He(2), + Li(3), + Be(4), + B(5), + C(6), + N(7), + O(8), + F(9), + Ne(10), + Na(11), + Mg(12), + Al(13), + Si(14), + P(15), + S(16), + Cl(17), + Ar(18), + K(19), + Ca(20), + Sc(21), + Ti(22), + V(23), + Cr(24), + Mn(25), + Fe(26), + Co(27), + Ni(28), + Cu(29), + Zn(30), + Ga(31), + Ge(32), + As(33), + Se(34), + Br(35), + Kr(36), + Rb(37), + Sr(38), + Y(39), + Zr(40), + Nb(41), + Mo(42), + Tc(43), + Ru(44), + Rh(45), + Pd(46), + Ag(47), + Cd(48), + In(49), + Sn(50), + Sb(51), + Te(52), + I(53), + Xe(54), + Cs(55), + Ba(56), + La(57), + Ce(58), + Pr(59), + Nd(60), + Pm(61), + Sm(62), + Eu(63), + Gd(64), + Tb(65), + Dy(66), + Ho(67), + Er(68), + Tm(69), + Yb(70), + Lu(71), + Hf(72), + Ta(73), + W(74), + Re(75), + Os(76), + Ir(77), + Pt(78), + Au(79), + Hg(80), + Tl(81), + Pb(82), + Bi(83), + Po(84), + At(85), + Rn(86), + Fr(87), + Ra(88), + Ac(89), + Th(90), + Pa(91), + U(92), + Np(93), + Pu(94), + Am(95), + Cm(96), + Bk(97), + Cf(98), + Es(99), + Fm(100), + Md(101), + No(102), + Lr(103), + Rf(104), + Db(105), + Sg(106), + Bh(107), + Hs(108), + Mt(109), + Ds(110), + Rg(111), + Cn(112), + Fl(114), + Lv(116); + + final int ATOMIC_NUM; + + private ChemEl(int atomicNum) { + this.ATOMIC_NUM = atomicNum; + } + + boolean isChalcogen() { + return (this == O || this == S || this == Se || this == Te); + } + + boolean isHalogen() { + return (this == F || this == Cl || this == Br || this == I); + } +} diff -Nru opsin-1.5.0/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/CipOrderingException.java opsin-2.3.1/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/CipOrderingException.java --- opsin-1.5.0/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/CipOrderingException.java 2013-07-21 14:02:29.000000000 +0000 +++ opsin-2.3.1/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/CipOrderingException.java 2017-07-23 20:55:18.000000000 +0000 @@ -6,7 +6,7 @@ * @author dl387 * */ -public class CipOrderingException extends RuntimeException { +class CipOrderingException extends StereochemistryException { private static final long serialVersionUID = 1L; diff -Nru opsin-1.5.0/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/CipSequenceRules.java opsin-2.3.1/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/CipSequenceRules.java --- opsin-1.5.0/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/CipSequenceRules.java 2013-07-21 14:02:29.000000000 +0000 +++ opsin-2.3.1/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/CipSequenceRules.java 2017-07-23 20:55:18.000000000 +0000 @@ -1,15 +1,16 @@ package uk.ac.cam.ch.wwmm.opsin; +import java.util.ArrayDeque; import java.util.ArrayList; import java.util.Collections; import java.util.Comparator; -import java.util.LinkedList; +import java.util.Deque; import java.util.List; import java.util.Queue; /** - * An implementation of Rule 1 of the CIP rules i.e. constitutional differences excluding isotopes - * Cases that require rules 2-5 to distinguish result in an exception + * An implementation of rules 1-2 of the CIP rules i.e. constitutional differences then isotopes if there is a tie + * Cases that require rules 3-5 to distinguish result in an exception * * Phantom atoms are not added as I believe that the results of the program will still be the same even in their absence as everything beats a phantom and comparing phantoms to phantoms achieves nothing * (higher ligancy beats lower ligancy when comparisons are performed) @@ -17,6 +18,13 @@ * */ class CipSequenceRules { + private static class CipOrderingRunTimeException extends RuntimeException { + private static final long serialVersionUID = 1L; + CipOrderingRunTimeException(String message) { + super(message); + } + } + private final Atom chiralAtom; CipSequenceRules(Atom chiralAtom) { @@ -26,10 +34,16 @@ /** * Returns the chiral atom's neighbours in CIP order from lowest priority to highest priority * @return + * @throws CipOrderingException */ - List getNeighbouringAtomsInCIPOrder() throws CipOrderingException { + List getNeighbouringAtomsInCipOrder() throws CipOrderingException { List neighbours = chiralAtom.getAtomNeighbours(); - Collections.sort(neighbours, new SortByCIPOrder(chiralAtom)); + try { + Collections.sort(neighbours, new SortByCipOrder(chiralAtom)); + } + catch (CipOrderingRunTimeException e) { + throw new CipOrderingException(e.getMessage()); + } return neighbours; } @@ -37,13 +51,19 @@ * Returns the chiral atom's neighbours, with the exception of the given atom, in CIP order from lowest priority to highest priority * @param neighbourToIgnore * @return + * @throws CipOrderingException */ - List getNeighbouringAtomsInCIPOrderIgnoringGivenNeighbour(Atom neighbourToIgnore) throws CipOrderingException { + List getNeighbouringAtomsInCipOrderIgnoringGivenNeighbour(Atom neighbourToIgnore) throws CipOrderingException { List neighbours = chiralAtom.getAtomNeighbours(); - if (!neighbours.remove(neighbourToIgnore)){ - throw new IllegalArgumentException("OPSIN bug: " + neighbourToIgnore.toCMLAtom().toXML() +" was not a neighbour of the given stereogenic atom"); + if (!neighbours.remove(neighbourToIgnore)) { + throw new IllegalArgumentException("OPSIN bug: Atom" + neighbourToIgnore.getID() +" was not a neighbour of the given stereogenic atom"); + } + try { + Collections.sort(neighbours, new SortByCipOrder(chiralAtom)); + } + catch (CipOrderingRunTimeException e) { + throw new CipOrderingException(e.getMessage()); } - Collections.sort(neighbours, new SortByCIPOrder(chiralAtom)); return neighbours; } @@ -53,7 +73,7 @@ * @author dl387 * */ - private static class CipState{ + private static class CipState { CipState(List nextAtoms1, List nextAtoms2) { this.nextAtoms1 = nextAtoms1; this.nextAtoms2 = nextAtoms2; @@ -67,7 +87,7 @@ * @author dl387 * */ - private static class AtomWithHistory{ + private static class AtomWithHistory { AtomWithHistory(Atom atom, List visitedAtoms, Integer indexOfOriginalFromRoot) { this.atom = atom; this.visitedAtoms = visitedAtoms; @@ -83,19 +103,19 @@ * @author dl387 * */ - private class SortByCIPOrder implements Comparator { + private class SortByCipOrder implements Comparator { private final Atom chiralAtom; - private final AtomListCIPComparator atomListCIPComparator = new AtomListCIPComparator(); - private final ListOfAtomListsCIPComparator listOfAtomListsCIPComparator = new ListOfAtomListsCIPComparator(); - private final CIPComparator cipComparator = new CIPComparator(); + private final AtomListCipComparator atomListCipComparator = new AtomListCipComparator(); + private final ListOfAtomListsCipComparator listOfAtomListsCipComparator = new ListOfAtomListsCipComparator(); + private final CipComparator cipComparator = new CipComparator(); private int rule = 0; - SortByCIPOrder(Atom chiralAtom) { + SortByCipOrder(Atom chiralAtom) { this.chiralAtom = chiralAtom; } - public int compare(Atom a, Atom b){ + public int compare(Atom a, Atom b) { /* * rule = 0 --> Rule 1a Higher atomic number precedes lower * rule = 1 --> Rule 1b A duplicated atom, with its predecessor node having the same label closer to the root, ranks higher than a duplicated atom, with its predecessor node having the same label farther from the root, which ranks higher than any non-duplicated atom node @@ -108,7 +128,7 @@ AtomWithHistory bWithHistory = new AtomWithHistory(b, new ArrayList(atomsVisted), null); int compare = compareByCipRules(aWithHistory, bWithHistory); - if (compare != 0){ + if (compare != 0) { return compare; } @@ -119,25 +139,25 @@ nextAtoms2.add(bWithHistory); CipState startingState = new CipState(nextAtoms1, nextAtoms2); - Queue cipStateQueue = new LinkedList(); + Deque cipStateQueue = new ArrayDeque(); cipStateQueue.add(startingState); /* Go through CIP states in a breadth-first manner: * Neighbours of the given atom/s (if multiple atoms this is because so far the two paths leading to them have been equivalent) are evaluated for both a and b * Neighbours are sorted by CIP priority - * Comparisons performed between neighbours of a and neighbours of b (will break if compare!=0) + * Comparisons performed between neighbours of a and neighbours of b (will break if compare != 0) * Degenerate neighbours grouped together * CIP state formed for each list of neighbours and added to queue in order of priority * */ - while(!cipStateQueue.isEmpty()){ - CipState currentState = cipStateQueue.remove(); + while(!cipStateQueue.isEmpty()) { + CipState currentState = cipStateQueue.removeFirst(); compare = compareAtNextLevel(currentState, cipStateQueue); - if (compare != 0){ + if (compare != 0) { return compare; } } } - throw new CipOrderingException("Failed to assign CIP stereochemistry, this indicates a bug in OPSIN or a limitation in OPSIN's implementation of the sequence rules"); + throw new CipOrderingRunTimeException("Failed to assign CIP stereochemistry, this indicates a bug in OPSIN or a limitation in OPSIN's implementation of the sequence rules"); } /** @@ -149,82 +169,67 @@ * @return */ private int compareAtNextLevel(CipState cipState, Queue queue) { - List>> newNeighbours1 = getNextLevelNeighbours(cipState.nextAtoms1); - List>> newNeighbours2 = getNextLevelNeighbours(cipState.nextAtoms2); + List> neighbours1 = getNextLevelNeighbours(cipState.nextAtoms1); + List> neighbours2 = getNextLevelNeighbours(cipState.nextAtoms2); - int compare = compareNeighboursByCIPpriorityRules(newNeighbours1, newNeighbours2); + int compare = compareNeighboursByCipPriorityRules(neighbours1, neighbours2); - if (compare!=0){ + if (compare != 0) { return compare; } - List>> prioritisedNeighbours1 = formListsWithSamePriority(newNeighbours1); - List>> prioritisedNeighbours2 = formListsWithSamePriority(newNeighbours2); + List> prioritisedNeighbours1 = formListsWithSamePriority(neighbours1); + List> prioritisedNeighbours2 = formListsWithSamePriority(neighbours2); - for (int i = 1; i <= prioritisedNeighbours1.size(); i++) { - List> nextNeighbourLists1 = prioritisedNeighbours1.get(prioritisedNeighbours1.size() -i); - List> nextNeighbourLists2 = prioritisedNeighbours2.get(prioritisedNeighbours2.size() -i); - for (int j = 1; j <= nextNeighbourLists1.size(); j++) { - List nextNeighbours1 = nextNeighbourLists1.get(nextNeighbourLists1.size() -j); - List nextNeighbours2 = nextNeighbourLists2.get(nextNeighbourLists2.size() -j); - CipState newCIPstate = new CipState(nextNeighbours1, nextNeighbours2); - queue.add(newCIPstate); - } + //As earlier compare was 0, prioritisedNeighbours1.size() == prioritisedNeighbours2.size() + for (int i = prioritisedNeighbours1.size() - 1; i >= 0; i--) { + queue.add(new CipState(prioritisedNeighbours1.get(i), prioritisedNeighbours2.get(i))); } return 0; } - private int compareNeighboursByCIPpriorityRules(List>> neighbours1, List>> neighbours2){ - int neighbours1Size = neighbours1.size(); - int neighbours2Size = neighbours2.size(); - int differenceInSize = neighbours1Size - neighbours2Size; - int maxCommonSize = neighbours1Size > neighbours2Size ? neighbours2Size : neighbours1Size; - for (int i = 1; i <= maxCommonSize; i++) { - int difference = listOfAtomListsCIPComparator.compare(neighbours1.get(neighbours1Size -i), neighbours2.get(neighbours2Size -i)); - if (difference >0){ - return 1; - } - if (difference < 0){ - return -1; - } + private int compareNeighboursByCipPriorityRules(List> neighbours1, List> neighbours2) { + int difference = listOfAtomListsCipComparator.compare(neighbours1, neighbours2); + if (difference >0) { + return 1; + } + if (difference < 0) { + return -1; } - if (differenceInSize >0){ - return 1; - } - if (differenceInSize <0){ - return -1; - } return 0; } - - private List>> getNextLevelNeighbours(List nextAtoms) { - List>> neighbours = getNextAtomsWithAppropriateGhostAtoms(nextAtoms); - for (List> list : neighbours) { - Collections.sort(list, atomListCIPComparator); + + private List> getNextLevelNeighbours(List nextAtoms) { + List> neighbourLists = new ArrayList>(); + for (AtomWithHistory nextAtom : nextAtoms) { + neighbourLists.add(getNextAtomsWithAppropriateGhostAtoms(nextAtom)); } - Collections.sort(neighbours, listOfAtomListsCIPComparator); - return neighbours; + Collections.sort(neighbourLists, atomListCipComparator); + return neighbourLists; } - - + /** * If given say [H,C,C] this becomes [H] [C,C] * If given say [H,C,C] [H,C,C] this becomes [H,H] [C,C,C,C] * If given say [H,C,C] [H,C,F] this becomes [H],[C,C][H][C][F] * as [H,C,F] is higher priority than [H,C,C] so all its atoms must be evaluated first - * The original neighbours list is assumed to have been presorted. - * @param neighbours + * The input lists of neighbours are assumed to have been presorted. + * @param neighbourLists */ - private List>> formListsWithSamePriority(List>> neighbours) { - List>> updatedNeighbours = new LinkedList>>(); - List> listsToRemove = new ArrayList>(); - for (List> neighbourLists : neighbours) { - List> updatedNeighbourLists = new LinkedList>(); - for (int i = 0; i < neighbourLists.size(); i++) { + private List> formListsWithSamePriority(List> neighbourLists) { + int intialNeighbourListCount = neighbourLists.size(); + if (intialNeighbourListCount > 1) { + List> listsToRemove = new ArrayList>(); + for (int i = 0; i < intialNeighbourListCount; i++) { List> neighbourListsToCombine = new ArrayList>(); List primaryAtomList = neighbourLists.get(i); - for (int j = i +1; j < neighbourLists.size(); j++) { - if (atomListCIPComparator.compare(neighbourLists.get(i), neighbourLists.get(j))==0){ - neighbourListsToCombine.add(neighbourLists.get(j)); + for (int j = i + 1; j < intialNeighbourListCount; j++) { + List neighbourListToCompareWith = neighbourLists.get(j); + if (atomListCipComparator.compare(primaryAtomList, neighbourListToCompareWith) == 0) { + neighbourListsToCombine.add(neighbourListToCompareWith); + i++; + } + else { + break; } } for (List neighbourList: neighbourListsToCombine) { @@ -232,36 +237,32 @@ primaryAtomList.addAll(neighbourList); } } - for (List list : listsToRemove) { - neighbourLists.remove(list); - } - //lists of same priority have been combined e.g. [H,C,C] [H,C,C] -->[H,C,C,H,C,C] - for (int i = neighbourLists.size()-1; i >=0; i--) { - List neighbourList = neighbourLists.get(i); - Collections.sort(neighbourList, cipComparator); - AtomWithHistory lastAtom = null; - List currentAtomList = new ArrayList(); - for (int j = neighbourList.size() -1; j >=0; j--) { - AtomWithHistory a = neighbourList.get(j); - if (lastAtom !=null && compareByCipRules(lastAtom, a) !=0){ - if (!currentAtomList.isEmpty()){ - updatedNeighbourLists.add(0, currentAtomList); - } - currentAtomList =new ArrayList(); - currentAtomList.add(a); - } - else{ - currentAtomList.add(a); - } - lastAtom = a; - } - if (!currentAtomList.isEmpty()){ - updatedNeighbourLists.add(0, currentAtomList); + neighbourLists.removeAll(listsToRemove); + } + + List> updatedNeighbourLists = new ArrayList>(); + //lists of same priority have already been combined (see above) e.g. [H,C,C] [H,C,C] -->[H,C,C,H,C,C] + //now sort these combined lists by CIP priority + //then group atoms that have the same CIP priority + for (int i = 0, lstsLen = neighbourLists.size(); i < lstsLen; i++) { + List neighbourList = neighbourLists.get(i); + Collections.sort(neighbourList, cipComparator); + AtomWithHistory lastAtom = null; + List currentAtomList = new ArrayList(); + for (int j = 0, lstLen = neighbourList.size(); j < lstLen; j++) { + AtomWithHistory a = neighbourList.get(j); + if (lastAtom != null && compareByCipRules(lastAtom, a) != 0) { + updatedNeighbourLists.add(currentAtomList); + currentAtomList = new ArrayList(); } + currentAtomList.add(a); + lastAtom = a; + } + if (!currentAtomList.isEmpty()) { + updatedNeighbourLists.add(currentAtomList); } - updatedNeighbours.add(updatedNeighbourLists); } - return updatedNeighbours; + return updatedNeighbourLists; } @@ -270,8 +271,8 @@ * @author dl387 * */ - private class CIPComparator implements Comparator { - public int compare(AtomWithHistory a, AtomWithHistory b){ + private class CipComparator implements Comparator { + public int compare(AtomWithHistory a, AtomWithHistory b) { return compareByCipRules(a, b); } } @@ -281,25 +282,25 @@ * @author dl387 * */ - private class AtomListCIPComparator implements Comparator> { - public int compare(List a, List b){ + private class AtomListCipComparator implements Comparator> { + public int compare(List a, List b) { int aSize = a.size(); int bSize = b.size(); int differenceInSize = aSize - bSize; int maxCommonSize = aSize > bSize ? bSize : aSize; for (int i = 1; i <= maxCommonSize; i++) { - int difference = compareByCipRules(a.get(aSize -i), b.get(bSize -i)); - if (difference >0){ + int difference = compareByCipRules(a.get(aSize - i), b.get(bSize - i)); + if (difference > 0) { return 1; } - if (difference < 0){ + if (difference < 0) { return -1; } } - if (differenceInSize >0){ + if (differenceInSize > 0) { return 1; } - if (differenceInSize <0){ + if (differenceInSize < 0) { return -1; } return 0; @@ -311,39 +312,39 @@ * @author dl387 * */ - private class ListOfAtomListsCIPComparator implements Comparator>> { - public int compare(List> a, List> b){ + private class ListOfAtomListsCipComparator implements Comparator>> { + public int compare(List> a, List> b) { int aSize = a.size(); int bSize = b.size(); int differenceInSize = aSize - bSize; int maxCommonSize = aSize > bSize ? bSize : aSize; for (int i = 1; i <= maxCommonSize; i++) { - List aprime = a.get(aSize -i); - List bprime = b.get(bSize -i); + List aprime = a.get(aSize - i); + List bprime = b.get(bSize - i); int aprimeSize = aprime.size(); int bprimeSize = bprime.size(); int differenceInSizeprime = aprimeSize - bprimeSize; int maxCommonSizeprime = aprimeSize > bprimeSize ? bprimeSize : aprimeSize; for (int j = 1; j <= maxCommonSizeprime; j++) { - int difference = compareByCipRules(aprime.get(aprimeSize -j), bprime.get(bprimeSize -j)); - if (difference >0){ + int difference = compareByCipRules(aprime.get(aprimeSize - j), bprime.get(bprimeSize - j)); + if (difference > 0) { return 1; } - if (difference < 0){ + if (difference < 0) { return -1; } } - if (differenceInSizeprime >0){ + if (differenceInSizeprime > 0) { return 1; } - if (differenceInSizeprime <0){ + if (differenceInSizeprime < 0) { return -1; } } - if (differenceInSize >0){ + if (differenceInSize > 0) { return 1; } - if (differenceInSize <0){ + if (differenceInSize < 0) { return -1; } return 0; @@ -351,73 +352,56 @@ } /** - * Gets the neighbouring atoms bar the previous atoms + * Gets the neighbouring atoms bar the previous atom in CIP order * If the neighbouring atom has already been visited it is replaced with a ghost atom * Multiple bonds including those to previous atoms yield ghost atoms unless the bond goes to the chiral atom e.g. in a sulfoxide * @param atoms * @return */ - private List>> getNextAtomsWithAppropriateGhostAtoms(List atoms) { - List>> allNeighbours = new ArrayList>>(); - int counter =0; - Atom lastPreviousAtom = null; - for (int i = 0; i < atoms.size(); i++) { - AtomWithHistory atomWithHistory = atoms.get(i); - Atom atom = atomWithHistory.atom; - List visitedAtoms = atomWithHistory.visitedAtoms; - Atom previousAtom = visitedAtoms.get(visitedAtoms.size()-1); - List visitedAtomsIncludingCurrentAtom = new ArrayList(visitedAtoms); - visitedAtomsIncludingCurrentAtom.add(atom); - - List neighboursWithHistory = new ArrayList(); - for(Bond b : atom.getBonds()) { - Atom atomBondConnectsTo = b.getOtherAtom(atom); - if (!atomBondConnectsTo.equals(chiralAtom)){//P-91.1.4.2.4 (higher order bonds to chiral centre do not involve duplication of atoms) - for (int j = b.getOrder(); j >1; j--) {//add ghost atoms to represent higher order bonds - Atom ghost = new Atom(atomBondConnectsTo.getElement()); - if (rule > 0){ - int indexOfOriginalAtom = visitedAtoms.indexOf(atomBondConnectsTo); - if (indexOfOriginalAtom != -1){ - neighboursWithHistory.add(new AtomWithHistory(ghost, visitedAtomsIncludingCurrentAtom, indexOfOriginalAtom)); - } - else{ - neighboursWithHistory.add(new AtomWithHistory(ghost, visitedAtomsIncludingCurrentAtom, visitedAtoms.size() + 1)); - } + private List getNextAtomsWithAppropriateGhostAtoms(AtomWithHistory atomWithHistory) { + Atom atom = atomWithHistory.atom; + List visitedAtoms = atomWithHistory.visitedAtoms; + Atom previousAtom = visitedAtoms.get(visitedAtoms.size()-1); + List visitedAtomsIncludingCurrentAtom = new ArrayList(visitedAtoms); + visitedAtomsIncludingCurrentAtom.add(atom); + + List neighboursWithHistory = new ArrayList(); + for(Bond b : atom.getBonds()) { + Atom atomBondConnectsTo = b.getOtherAtom(atom); + if (!atomBondConnectsTo.equals(chiralAtom)) {//P-91.1.4.2.4 (higher order bonds to chiral centre do not involve duplication of atoms) + for (int j = b.getOrder(); j >1; j--) {//add ghost atoms to represent higher order bonds + Atom ghost = new Atom(atomBondConnectsTo.getElement()); + if (rule > 0) { + int indexOfOriginalAtom = visitedAtoms.indexOf(atomBondConnectsTo); + if (indexOfOriginalAtom != -1) { + neighboursWithHistory.add(new AtomWithHistory(ghost, visitedAtomsIncludingCurrentAtom, indexOfOriginalAtom)); } else{ - neighboursWithHistory.add(new AtomWithHistory(ghost, visitedAtomsIncludingCurrentAtom, null)); + neighboursWithHistory.add(new AtomWithHistory(ghost, visitedAtomsIncludingCurrentAtom, visitedAtoms.size() + 1)); } } + else{ + neighboursWithHistory.add(new AtomWithHistory(ghost, visitedAtomsIncludingCurrentAtom, null)); + } } - if (!atomBondConnectsTo.equals(previousAtom)){ - if (visitedAtoms.contains(atomBondConnectsTo)){//cycle detected, add ghost atom instead - Atom ghost = new Atom(atomBondConnectsTo.getElement()); - if (rule > 0){ - neighboursWithHistory.add(new AtomWithHistory(ghost, visitedAtomsIncludingCurrentAtom, visitedAtoms.indexOf(atomBondConnectsTo))); - } - else{ - neighboursWithHistory.add(new AtomWithHistory(ghost, visitedAtomsIncludingCurrentAtom, null)); - } + } + if (!atomBondConnectsTo.equals(previousAtom)) { + if (visitedAtoms.contains(atomBondConnectsTo)) {//cycle detected, add ghost atom instead + Atom ghost = new Atom(atomBondConnectsTo.getElement()); + if (rule > 0) { + neighboursWithHistory.add(new AtomWithHistory(ghost, visitedAtomsIncludingCurrentAtom, visitedAtoms.indexOf(atomBondConnectsTo))); } else{ - neighboursWithHistory.add(new AtomWithHistory(atomBondConnectsTo, visitedAtomsIncludingCurrentAtom, null)); + neighboursWithHistory.add(new AtomWithHistory(ghost, visitedAtomsIncludingCurrentAtom, null)); } } + else{ + neighboursWithHistory.add(new AtomWithHistory(atomBondConnectsTo, visitedAtomsIncludingCurrentAtom, null)); + } } - Collections.sort(neighboursWithHistory, cipComparator); - if (lastPreviousAtom==null){ - lastPreviousAtom = previousAtom; - } - else if (lastPreviousAtom !=previousAtom){ - lastPreviousAtom = previousAtom; - counter++; - } - if (allNeighbours.size() <= counter){ - allNeighbours.add(new ArrayList>()); - } - allNeighbours.get(counter).add(neighboursWithHistory); } - return allNeighbours; + Collections.sort(neighboursWithHistory, cipComparator); + return neighboursWithHistory; } /** @@ -426,53 +410,53 @@ * @param b * @return */ - private int compareByCipRules(AtomWithHistory a, AtomWithHistory b){ + private int compareByCipRules(AtomWithHistory a, AtomWithHistory b) { //rule 1a //prefer higher atomic number - int atomicNumber1 = AtomProperties.elementToAtomicNumber.get(a.atom.getElement()); - int atomicNumber2 = AtomProperties.elementToAtomicNumber.get(b.atom.getElement()); - if (atomicNumber1 > atomicNumber2){ + int atomicNumber1 = a.atom.getElement().ATOMIC_NUM; + int atomicNumber2 = b.atom.getElement().ATOMIC_NUM; + if (atomicNumber1 > atomicNumber2) { return 1; } - else if (atomicNumber1 < atomicNumber2){ + else if (atomicNumber1 < atomicNumber2) { return -1; } - if (rule > 0){ + if (rule > 0) { //rule 1b //prefer duplicate to non-duplicate Integer indexFromRoot1 = a.indexOfOriginalFromRoot; Integer indexFromRoot2 = b.indexOfOriginalFromRoot; - if (indexFromRoot1 != null && indexFromRoot2 == null){ + if (indexFromRoot1 != null && indexFromRoot2 == null) { return 1; } - if (indexFromRoot1 == null && indexFromRoot2 != null){ + if (indexFromRoot1 == null && indexFromRoot2 != null) { return -1; } //prefer duplicate of node closer to root - if (indexFromRoot1 != null && indexFromRoot2 != null){ - if (indexFromRoot1 < indexFromRoot2 ){ + if (indexFromRoot1 != null && indexFromRoot2 != null) { + if (indexFromRoot1 < indexFromRoot2 ) { return 1; } - if (indexFromRoot1 > indexFromRoot2 ){ + if (indexFromRoot1 > indexFromRoot2 ) { return -1; } } - if (rule > 1){ + if (rule > 1) { //rule 2 //prefer higher atomic mass Integer atomicMass1 = a.atom.getIsotope(); Integer atomicMass2 = b.atom.getIsotope(); - if (atomicMass1 != null && atomicMass2 == null){ + if (atomicMass1 != null && atomicMass2 == null) { return 1; } - else if (atomicMass1 == null && atomicMass2 != null){ + else if (atomicMass1 == null && atomicMass2 != null) { return -1; } - else if (atomicMass1 != null && atomicMass2 != null){ - if (atomicMass1 > atomicMass2){ + else if (atomicMass1 != null && atomicMass2 != null) { + if (atomicMass1 > atomicMass2) { return 1; } - else if (atomicMass1 < atomicMass2){ + else if (atomicMass1 < atomicMass2) { return -1; } } diff -Nru opsin-1.5.0/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/CMLWriter.java opsin-2.3.1/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/CMLWriter.java --- opsin-1.5.0/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/CMLWriter.java 1970-01-01 00:00:00.000000000 +0000 +++ opsin-2.3.1/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/CMLWriter.java 2017-07-23 20:55:18.000000000 +0000 @@ -0,0 +1,201 @@ +package uk.ac.cam.ch.wwmm.opsin; + +import java.io.ByteArrayOutputStream; +import java.io.UnsupportedEncodingException; +import java.util.List; + +import javax.xml.stream.XMLOutputFactory; +import javax.xml.stream.XMLStreamException; +import javax.xml.stream.XMLStreamWriter; + +import com.ctc.wstx.api.WstxOutputProperties; +import com.ctc.wstx.stax.WstxOutputFactory; + +class CMLWriter { + /** + * CML Elements/Attributes/NameSpace + */ + static final String CML_NAMESPACE = "http://www.xml-cml.org/schema"; + + private static final XMLOutputFactory factory = new WstxOutputFactory(); + static { + factory.setProperty(WstxOutputProperties.P_OUTPUT_ESCAPE_CR, false); + } + + /**The XML writer*/ + private final XMLStreamWriter writer; + + /** + * Creates a CML writer for the given fragment + * @param writer + + */ + CMLWriter(XMLStreamWriter writer) { + this.writer = writer; + } + + static String generateCml(Fragment structure, String chemicalName) { + return generateCml(structure, chemicalName, false); + } + + static String generateIndentedCml(Fragment structure, String chemicalName) { + return generateCml(structure, chemicalName, true); + } + + private static String generateCml(Fragment structure, String chemicalName, boolean indent) { + ByteArrayOutputStream out = new ByteArrayOutputStream(); + try { + XMLStreamWriter xmlWriter = factory.createXMLStreamWriter(out, "UTF-8"); + if (indent) { + xmlWriter = new IndentingXMLStreamWriter(xmlWriter, 2); + } + CMLWriter cmlWriter = new CMLWriter(xmlWriter); + cmlWriter.writeCmlStart(); + cmlWriter.writeMolecule(structure, chemicalName, 1); + cmlWriter.writeCmlEnd(); + xmlWriter.close(); + } catch (XMLStreamException e) { + throw new RuntimeException(e); + } + try { + return out.toString("UTF-8"); + } catch (UnsupportedEncodingException e) { + throw new RuntimeException("JVM doesn't support UTF-8...but it should do!"); + } + } + + void writeCmlStart(){ + try { + writer.writeStartElement("cml"); + writer.writeDefaultNamespace(CML_NAMESPACE); + writer.writeAttribute("convention", "conventions:molecular"); + writer.writeNamespace("conventions", "http://www.xml-cml.org/convention/"); + writer.writeNamespace("cmlDict", "http://www.xml-cml.org/dictionary/cml/"); + writer.writeNamespace("nameDict", "http://www.xml-cml.org/dictionary/cml/name/"); + } catch (XMLStreamException e) { + throw new RuntimeException(e); + } + } + + void writeCmlEnd(){ + try { + writer.writeEndElement(); + writer.flush(); + } catch (XMLStreamException e) { + throw new RuntimeException(e); + } + } + + void writeMolecule(Fragment structure, String chemicalName, int id) throws XMLStreamException { + writer.writeStartElement("molecule"); + writer.writeAttribute("id", "m" + id); + + writer.writeStartElement("name"); + writer.writeAttribute("dictRef", "nameDict:unknown"); + writer.writeCharacters(chemicalName); + writer.writeEndElement(); + + if (structure != null) { + writer.writeStartElement("atomArray"); + for(Atom atom : structure.getAtomList()) { + writeAtom(atom); + } + writer.writeEndElement(); + + writer.writeStartElement("bondArray"); + for(Bond bond : structure.getBondSet()) { + writeBond(bond); + } + writer.writeEndElement(); + } + + writer.writeEndElement(); + } + + private void writeAtom(Atom atom) throws XMLStreamException { + writer.writeStartElement("atom"); + writer.writeAttribute("id", "a" + Integer.toString(atom.getID())); + writer.writeAttribute("elementType", atom.getElement().toString()); + if(atom.getCharge() != 0){ + writer.writeAttribute("formalCharge", Integer.toString(atom.getCharge())); + } + if(atom.getIsotope() != null){ + writer.writeAttribute("isotopeNumber", Integer.toString(atom.getIsotope())); + } + if (atom.getElement() != ChemEl.H){ + int hydrogenCount =0; + List neighbours = atom.getAtomNeighbours(); + for (Atom neighbour : neighbours) { + if (neighbour.getElement() == ChemEl.H){ + hydrogenCount++; + } + } + if (hydrogenCount==0){//prevent adding of implicit hydrogen + writer.writeAttribute("hydrogenCount", "0"); + } + } + AtomParity atomParity = atom.getAtomParity(); + if(atomParity != null){ + writeAtomParity(atomParity); + } + for(String locant : atom.getLocants()) { + writer.writeStartElement("label"); + writer.writeAttribute("value", locant); + writer.writeAttribute("dictRef", "cmlDict:locant"); + writer.writeEndElement(); + } + writer.writeEndElement(); + } + + private void writeAtomParity(AtomParity atomParity) throws XMLStreamException { + writer.writeStartElement("atomParity"); + writeAtomRefs4(atomParity.getAtomRefs4()); + writer.writeCharacters(Integer.toString(atomParity.getParity())); + writer.writeEndElement(); + } + + private void writeBond(Bond bond) throws XMLStreamException { + writer.writeStartElement("bond"); + writer.writeAttribute("id", "a" + Integer.toString(bond.getFrom()) + "_a" + Integer.toString(bond.getTo())); + writer.writeAttribute("atomRefs2", "a" + Integer.toString(bond.getFrom()) + " a" + Integer.toString(bond.getTo())); + switch (bond.getOrder()) { + case 1: + writer.writeAttribute("order", "S"); + break; + case 2: + writer.writeAttribute("order", "D"); + break; + case 3: + writer.writeAttribute("order", "T"); + break; + default: + writer.writeAttribute("order", "unknown"); + break; + } + BondStereo bondStereo = bond.getBondStereo(); + if (bondStereo != null){ + writeBondStereo(bondStereo); + } + writer.writeEndElement(); + } + + private void writeBondStereo(BondStereo bondStereo) throws XMLStreamException { + writer.writeStartElement("bondStereo"); + writeAtomRefs4(bondStereo.getAtomRefs4()); + writer.writeCharacters(bondStereo.getBondStereoValue().toString()); + writer.writeEndElement(); + } + + private void writeAtomRefs4(Atom[] atomRefs4) throws XMLStreamException { + StringBuilder atomRefsSb = new StringBuilder(); + for(int i = 0; i< atomRefs4.length - 1; i++) { + atomRefsSb.append('a'); + atomRefsSb.append(atomRefs4[i].getID()); + atomRefsSb.append(' '); + } + atomRefsSb.append('a'); + atomRefsSb.append(atomRefs4[atomRefs4.length - 1].getID()); + writer.writeAttribute("atomRefs4", atomRefsSb.toString()); + } + +} diff -Nru opsin-1.5.0/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/ComponentGenerationException.java opsin-2.3.1/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/ComponentGenerationException.java --- opsin-1.5.0/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/ComponentGenerationException.java 2013-07-21 14:02:29.000000000 +0000 +++ opsin-2.3.1/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/ComponentGenerationException.java 2017-07-23 20:55:18.000000000 +0000 @@ -11,22 +11,18 @@ ComponentGenerationException() { super(); - // TODO Auto-generated constructor stub } ComponentGenerationException(String message) { super(message); - // TODO Auto-generated constructor stub } ComponentGenerationException(String message, Throwable cause) { super(message, cause); - // TODO Auto-generated constructor stub } ComponentGenerationException(Throwable cause) { super(cause); - // TODO Auto-generated constructor stub } } diff -Nru opsin-1.5.0/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/ComponentGenerator.java opsin-2.3.1/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/ComponentGenerator.java --- opsin-1.5.0/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/ComponentGenerator.java 2013-07-21 14:02:29.000000000 +0000 +++ opsin-2.3.1/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/ComponentGenerator.java 2017-07-23 20:55:18.000000000 +0000 @@ -1,22 +1,20 @@ package uk.ac.cam.ch.wwmm.opsin; +import java.util.ArrayDeque; import java.util.ArrayList; import java.util.Collections; import java.util.Comparator; +import java.util.Deque; import java.util.HashMap; -import java.util.LinkedList; import java.util.List; +import java.util.Locale; +import java.util.Map; import java.util.regex.Matcher; import java.util.regex.Pattern; import static uk.ac.cam.ch.wwmm.opsin.XmlDeclarations.*; import static uk.ac.cam.ch.wwmm.opsin.OpsinTools.*; -import nu.xom.Attribute; -import nu.xom.Element; -import nu.xom.Elements; -import nu.xom.Node; - /**Does destructive procedural parsing on parser results. * * @author ptc24 @@ -69,42 +67,43 @@ } //match a fusion bracket with only numerical locants. If this is followed by a HW group it probably wasn't a fusion bracket - private final static Pattern matchNumberLocantsOnlyFusionBracket = Pattern.compile("\\[\\d+(,\\d+)*\\]"); - private final static Pattern matchCommaOrDot =Pattern.compile("[\\.,]"); - private final static Pattern matchAnnulene = Pattern.compile("[\\[\\(\\{]([1-9]\\d*)[\\]\\)\\}]annulen"); - private final static String elementSymbols ="(?:He|Li|Be|B|C|N|O|F|Ne|Na|Mg|Al|Si|P|S|Cl|Ar|K|Ca|Sc|Ti|V|Cr|Mn|Fe|Co|Ni|Cu|Zn|Ga|Ge|As|Se|Br|Kr|Rb|Sr|Y|Zr|Nb|Mo|Tc|Ru|Rh|Pd|Ag|Cd|In|Sn|Sb|Te|I|Xe|Cs|Ba|La|Ce|Pr|Nd|Pm|Sm|Eu|Gd|Tb|Dy|Ho|Er|Tm|Yb|Lu|Hf|Ta|W|Re|Os|Ir|Pt|Au|Hg|Tl|Pb|Po|At|Rn|Fr|Ra|Ac|Th|Pa|U|Np|Pu|Am|Cm|Bk|Cf|Es|Fm|Md|No|Lr|Rf|Db|Sg|Bh|Hs|Mt|Ds)"; - private final static Pattern matchStereochemistry = Pattern.compile("(.*?)(SR|RS|[RSEZrsezabx]|[cC][iI][sS]|[tT][rR][aA][nN][sS]|[aA][lL][pP][hH][aA]|[bB][eE][tT][aA]|[xX][iI])"); - private final static Pattern matchStar = Pattern.compile("\\^?\\*"); - private final static Pattern matchRS = Pattern.compile("[RSrs]"); - private final static Pattern matchEZ = Pattern.compile("[EZez]"); - private final static Pattern matchAlphaBetaStereochem = Pattern.compile("a|b|x|[aA][lL][pP][hH][aA]|[bB][eE][tT][aA]|[xX][iI]"); - private final static Pattern matchCisTrans = Pattern.compile("[cC][iI][sS]|[tT][rR][aA][nN][sS]"); - private final static Pattern matchLambdaConvention = Pattern.compile("(\\S+)?lambda\\D*(\\d+)\\D*", Pattern.CASE_INSENSITIVE); - private final static Pattern matchCommaOrDash =Pattern.compile("[,-]"); - private final static Pattern matchHdigit =Pattern.compile("H\\d"); - private final static Pattern matchDigit =Pattern.compile("\\d+"); - private final static Pattern matchNonDigit =Pattern.compile("\\D+"); - private final static Pattern matchSuperscriptedLocant = Pattern.compile("(" + elementSymbols +"'*)[\\^\\[\\(\\{~]*(?:[sS][uU][pP][ ]?)?([^\\^\\[\\(\\{~\\]\\)\\}]+)[^\\[\\(\\{]*"); - private final static Pattern matchIUPAC2004ElementLocant = Pattern.compile("(\\d+'*)-(" + elementSymbols +"'*)(.*)"); - private final static Pattern matchBracketAtEndOfLocant = Pattern.compile("-?[\\[\\(\\{](.*)[\\]\\)\\}]$"); - private final static Pattern matchGreek = Pattern.compile("alpha|beta|gamma|delta|epsilon|zeta|eta|omega", Pattern.CASE_INSENSITIVE); - private final static Pattern matchInlineSuffixesThatAreAlsoGroups = Pattern.compile("carbonyl|oxy|sulfenyl|sulfinyl|sulfonyl|selenenyl|seleninyl|selenonyl|tellurenyl|tellurinyl|telluronyl"); + private static final Pattern matchNumberLocantsOnlyFusionBracket = Pattern.compile("\\[\\d+(,\\d+)*\\]"); + private static final Pattern matchCommaOrDot =Pattern.compile("[\\.,]"); + private static final Pattern matchAnnulene = Pattern.compile("[\\[\\(\\{]([1-9]\\d*)[\\]\\)\\}]annulen"); + private static final String elementSymbols ="(?:He|Li|Be|B|C|N|O|F|Ne|Na|Mg|Al|Si|P|S|Cl|Ar|K|Ca|Sc|Ti|V|Cr|Mn|Fe|Co|Ni|Cu|Zn|Ga|Ge|As|Se|Br|Kr|Rb|Sr|Y|Zr|Nb|Mo|Tc|Ru|Rh|Pd|Ag|Cd|In|Sn|Sb|Te|I|Xe|Cs|Ba|La|Ce|Pr|Nd|Pm|Sm|Eu|Gd|Tb|Dy|Ho|Er|Tm|Yb|Lu|Hf|Ta|W|Re|Os|Ir|Pt|Au|Hg|Tl|Pb|Po|At|Rn|Fr|Ra|Ac|Th|Pa|U|Np|Pu|Am|Cm|Bk|Cf|Es|Fm|Md|No|Lr|Rf|Db|Sg|Bh|Hs|Mt|Ds)"; + private static final Pattern matchStereochemistry = Pattern.compile("(.*?)(SR|R/?S|r/?s|[Ee][Zz]|[RSEZrsezabx]|[cC][iI][sS]|[tT][rR][aA][nN][sS]|[aA][lL][pP][hH][aA]|[bB][eE][tT][aA]|[xX][iI]|[eE][xX][oO]|[eE][nN][dD][oO]|[sS][yY][nN]|[aA][nN][tT][iI]|M|P|Ra|Sa|Sp|Rp)"); + private static final Pattern matchStar = Pattern.compile("\\^?\\*"); + private static final Pattern matchRacemic = Pattern.compile("rac(\\.|em(\\.|ic)?)?-?", Pattern.CASE_INSENSITIVE); + private static final Pattern matchRS = Pattern.compile("[Rr]/?[Ss]?|[Ss][Rr]?"); + private static final Pattern matchEZ = Pattern.compile("[EZez]|[Ee][Zz]"); + private static final Pattern matchAlphaBetaStereochem = Pattern.compile("a|b|x|[aA][lL][pP][hH][aA]|[bB][eE][tT][aA]|[xX][iI]"); + private static final Pattern matchCisTrans = Pattern.compile("[cC][iI][sS]|[tT][rR][aA][nN][sS]"); + private static final Pattern matchEndoExoSynAnti = Pattern.compile("[eE][xX][oO]|[eE][nN][dD][oO]|[sS][yY][nN]|[aA][nN][tT][iI]"); + private static final Pattern matchAxialStereo = Pattern.compile("M|P|Ra|Sa|Sp|Rp"); + private static final Pattern matchLambdaConvention = Pattern.compile("(\\S+)?lambda\\D*(\\d+)\\D*", Pattern.CASE_INSENSITIVE); + private static final Pattern matchHdigit =Pattern.compile("H\\d"); + private static final Pattern matchDigit =Pattern.compile("\\d+"); + private static final Pattern matchNonDigit =Pattern.compile("\\D+"); + private static final Pattern matchSuperscriptedLocant = Pattern.compile("(" + elementSymbols +"'*)[\\^\\[\\(\\{~\\*\\<]*(?:[sS][uU][pP][ ]?)?([^\\^\\[\\(\\{~\\*\\<\\]\\)\\}\\>]+)[^\\[\\(\\{]*"); + private static final Pattern matchIUPAC2004ElementLocant = Pattern.compile("(\\d+'*)-(" + elementSymbols +"'*)(.*)"); + private static final Pattern matchBracketAtEndOfLocant = Pattern.compile("-?[\\[\\(\\{](.*)[\\]\\)\\}]$"); + private static final Pattern matchGreek = Pattern.compile("alpha|beta|gamma|delta|epsilon|zeta|eta|omega", Pattern.CASE_INSENSITIVE); + private static final Pattern matchInlineSuffixesThatAreAlsoGroups = Pattern.compile("carbonyl|oxy|sulfenyl|sulfinyl|sulfonyl|selenenyl|seleninyl|selenonyl|tellurenyl|tellurinyl|telluronyl"); private final NameToStructureConfig n2sConfig; - private final Element parse; - public ComponentGenerator(NameToStructureConfig n2sConfig, Element parse) { + ComponentGenerator(NameToStructureConfig n2sConfig) { this.n2sConfig = n2sConfig; - this.parse = parse; } /** * Processes a parse result destructively adding semantic information by processing the various micro syntaxes. + * @param parse * @throws ComponentGenerationException */ - void processParse() throws ComponentGenerationException { - List substituentsAndRoot = XOMTools.getDescendantElementsWithTagNames(parse, new String[]{SUBSTITUENT_EL, ROOT_EL}); + void processParse(Element parse) throws ComponentGenerationException { + List substituentsAndRoot = OpsinTools.getDescendantElementsWithTagNames(parse, new String[]{SUBSTITUENT_EL, ROOT_EL}); for (Element subOrRoot: substituentsAndRoot) { /* Throws exceptions for occurrences that are ambiguous and this parse has picked the incorrect interpretation */ @@ -121,11 +120,11 @@ processSuffixPrefixes(subOrRoot); processLambdaConvention(subOrRoot); } - List groups = XOMTools.getDescendantElementsWithTagName(parse, GROUP_EL); + List groups = OpsinTools.getDescendantElementsWithTagName(parse, GROUP_EL); /* Converts open/close bracket elements to bracket elements and * places the elements inbetween within the newly created bracket */ - while(findAndStructureBrackets(substituentsAndRoot)); + findAndStructureBrackets(substituentsAndRoot); for (Element subOrRoot: substituentsAndRoot) { processHydroCarbonRings(subOrRoot); @@ -144,22 +143,22 @@ * @throws ComponentGenerationException */ static void resolveAmbiguities(Element subOrRoot) throws ComponentGenerationException { - List multipliers = XOMTools.getChildElementsWithTagName(subOrRoot, MULTIPLIER_EL); + List multipliers = subOrRoot.getChildElements(MULTIPLIER_EL); for (Element apparentMultiplier : multipliers) { if (!BASIC_TYPE_VAL.equals(apparentMultiplier.getAttributeValue(TYPE_ATR)) && !VONBAEYER_TYPE_VAL.equals(apparentMultiplier.getAttributeValue(TYPE_ATR))){ continue; } int multiplierNum = Integer.parseInt(apparentMultiplier.getAttributeValue(VALUE_ATR)); - Element nextEl = (Element)XOMTools.getNextSibling(apparentMultiplier); + Element nextEl = OpsinTools.getNextSibling(apparentMultiplier); if (multiplierNum >=3){//detects ambiguous use of things like tetradeca if(nextEl !=null){ - if (nextEl.getLocalName().equals(ALKANESTEMCOMPONENT)){//can ignore the trivial alkanes as ambiguity does not exist for them + if (nextEl.getName().equals(ALKANESTEMCOMPONENT)){//can ignore the trivial alkanes as ambiguity does not exist for them int alkaneChainLength = Integer.parseInt(nextEl.getAttributeValue(VALUE_ATR)); if (alkaneChainLength >=10 && alkaneChainLength > multiplierNum){ - Element isThisALocant =(Element)XOMTools.getPreviousSibling(apparentMultiplier); + Element isThisALocant = OpsinTools.getPreviousSibling(apparentMultiplier); if (isThisALocant == null || - !isThisALocant.getLocalName().equals(LOCANT_EL) || - MATCH_COMMA.split(isThisALocant.getValue()).length != multiplierNum){ + !isThisALocant.getName().equals(LOCANT_EL) || + isThisALocant.getValue().split(",").length != multiplierNum){ throw new ComponentGenerationException(apparentMultiplier.getValue() + nextEl.getValue() +" should not have been lexed as two tokens!"); } } @@ -167,12 +166,12 @@ } } - if (multiplierNum >=4 && nextEl !=null && nextEl.getLocalName().equals(HYDROCARBONFUSEDRINGSYSTEM_EL) && nextEl.getValue().equals("phen") && !"e".equals(nextEl.getAttributeValue(SUBSEQUENTUNSEMANTICTOKEN_ATR))){//deals with tetra phenyl vs tetraphen yl - Element possibleLocantOrMultiplierOrSuffix = (Element) XOMTools.getNextSibling(nextEl); + if (multiplierNum >=4 && nextEl !=null && nextEl.getName().equals(HYDROCARBONFUSEDRINGSYSTEM_EL) && nextEl.getValue().equals("phen") && !"e".equals(nextEl.getAttributeValue(SUBSEQUENTUNSEMANTICTOKEN_ATR))){//deals with tetra phenyl vs tetraphen yl + Element possibleLocantOrMultiplierOrSuffix = OpsinTools.getNextSibling(nextEl); if (possibleLocantOrMultiplierOrSuffix!=null){//null if not used as substituent - if (possibleLocantOrMultiplierOrSuffix.getLocalName().equals(SUFFIX_EL)){//for phen the aryl substituent, expect an adjacent suffix e.g. phenyl, phenoxy - Element isThisALocant =(Element)XOMTools.getPreviousSibling(apparentMultiplier); - if (isThisALocant == null || !isThisALocant.getLocalName().equals(LOCANT_EL) || MATCH_COMMA.split(isThisALocant.getValue()).length != 1){ + if (possibleLocantOrMultiplierOrSuffix.getName().equals(SUFFIX_EL)){//for phen the aryl substituent, expect an adjacent suffix e.g. phenyl, phenoxy + Element isThisALocant = OpsinTools.getPreviousSibling(apparentMultiplier); + if (isThisALocant == null || !isThisALocant.getName().equals(LOCANT_EL) || isThisALocant.getValue().split(",").length != 1){ String multiplierAndGroup =apparentMultiplier.getValue() + nextEl.getValue(); throw new ComponentGenerationException(multiplierAndGroup +" should not have been lexed as one token!"); } @@ -180,31 +179,31 @@ } } if (multiplierNum > 4 && !apparentMultiplier.getValue().endsWith("a")){//disambiguate pent oxy and the like. Assume it means pentanoxy rather than 5 oxys - if (nextEl !=null && nextEl.getLocalName().equals(GROUP_EL)&& matchInlineSuffixesThatAreAlsoGroups.matcher(nextEl.getValue()).matches()){ + if (nextEl !=null && nextEl.getName().equals(GROUP_EL)&& matchInlineSuffixesThatAreAlsoGroups.matcher(nextEl.getValue()).matches()){ throw new ComponentGenerationException(apparentMultiplier.getValue() + nextEl.getValue() +" should have been lexed as [alkane stem, inline suffix], not [multiplier, group]!"); } } } - List fusions = XOMTools.getChildElementsWithTagName(subOrRoot, FUSION_EL); + List fusions = subOrRoot.getChildElements(FUSION_EL); for (Element fusion : fusions) { String fusionText = fusion.getValue(); if (matchNumberLocantsOnlyFusionBracket.matcher(fusionText).matches()){ - Element possibleHWRing = XOMTools.getNextSiblingIgnoringCertainElements(fusion, new String[]{MULTIPLIER_EL, HETEROATOM_EL}); + Element possibleHWRing = OpsinTools.getNextSiblingIgnoringCertainElements(fusion, new String[]{MULTIPLIER_EL, HETEROATOM_EL}); if (possibleHWRing !=null && HANTZSCHWIDMAN_SUBTYPE_VAL.equals(possibleHWRing.getAttributeValue(SUBTYPE_ATR))){ int heteroCount = 0; int multiplierValue = 1; - Element currentElem = (Element) XOMTools.getNextSibling(fusion); - while(currentElem != null && !currentElem.getLocalName().equals(GROUP_EL)){ - if(currentElem.getLocalName().equals(HETEROATOM_EL)) { + Element currentElem = OpsinTools.getNextSibling(fusion); + while(currentElem != null && !currentElem.getName().equals(GROUP_EL)){ + if(currentElem.getName().equals(HETEROATOM_EL)) { heteroCount+=multiplierValue; multiplierValue =1; - } else if (currentElem.getLocalName().equals(MULTIPLIER_EL)){ + } else if (currentElem.getName().equals(MULTIPLIER_EL)){ multiplierValue = Integer.parseInt(currentElem.getAttributeValue(VALUE_ATR)); } - currentElem = (Element)XOMTools.getNextSibling(currentElem); + currentElem = OpsinTools.getNextSibling(currentElem); } - String[] locants = MATCH_COMMA.split(fusionText.substring(1, fusionText.length()-1)); + String[] locants = fusionText.substring(1, fusionText.length() - 1).split(","); if (locants.length == heteroCount){ boolean foundLocantNotInHwSystem =false; for (String locant : locants) { @@ -235,64 +234,70 @@ * @throws ComponentGenerationException */ static void processLocants(Element subOrRoot) throws ComponentGenerationException { - List locants = XOMTools.getChildElementsWithTagName(subOrRoot, LOCANT_EL); + List locants = subOrRoot.getChildElements(LOCANT_EL); for (Element locant : locants) { List individualLocants = splitIntoIndividualLocants(StringTools.removeDashIfPresent(locant.getValue())); - for (int i = 0; i < individualLocants.size(); i++) { - String locantText =individualLocants.get(i); + for (int i = 0, locantCount = individualLocants.size(); i < locantCount; i++) { + String locantText = individualLocants.get(i); - if (locantText.contains("-")){//avoids this regex being invoked typically + if (locantText.contains("-")) {//avoids this regex being invoked typically //rearranges locant to the older equivalent form - Matcher m= matchIUPAC2004ElementLocant.matcher(locantText); + Matcher m = matchIUPAC2004ElementLocant.matcher(locantText); if (m.matches()){ locantText = m.group(2) + m.group(1) + m.group(3); } } - if (Character.isLetter(locantText.charAt(0))){ + if (Character.isLetter(locantText.charAt(0))) { //remove indications of superscript as the fact a locant is superscripted can be determined from context e.g. N~1~ ->N1 Matcher m = matchSuperscriptedLocant.matcher(locantText); - if (m.lookingAt()){ - String replacementString = m.group(1) +m.group(2); + if (m.lookingAt()) { + String replacementString = m.group(1) + m.group(2); locantText = m.replaceFirst(replacementString); } - if (locantText.length()>=3){ + if (locantText.length() >= 3){ //convert greeks to lower case m = matchGreek.matcher(locantText); while (m.find()) { - locantText = locantText.substring(0, m.start()) + m.group().toLowerCase() + locantText.substring(m.end()); + locantText = locantText.substring(0, m.start()) + m.group().toLowerCase(Locale.ROOT) + locantText.substring(m.end()); } } } - char lastChar = locantText.charAt(locantText.length()-1); - if(lastChar == ')' || lastChar == ']' || lastChar == '}') { + char lastChar = locantText.charAt(locantText.length() - 1); + if(lastChar == ')' || lastChar == ']' || lastChar == '}') { //stereochemistry or added hydrogen that result from the application of this locant as a locant for a substituent may be included in brackets after the locant Matcher m = matchBracketAtEndOfLocant.matcher(locantText); - if (m.find()){ + if (m.find()) { String brackettedText = m.group(1); - if (StringTools.endsWithCaseInsensitive(brackettedText, "H")){ + if (StringTools.endsWithCaseInsensitive(brackettedText, "H")) { locantText = m.replaceFirst("");//strip the bracket from the locantText //create individual tags for added hydrogen. Examples of bracketed text include "9H" or "2H,7H" - String[] addedHydrogens = MATCH_COMMA.split(brackettedText); + String[] addedHydrogens = brackettedText.split(","); for (String addedHydrogen : addedHydrogens) { - Element addedHydrogenElement=new Element(ADDEDHYDROGEN_EL); - addedHydrogenElement.addAttribute(new Attribute(LOCANT_ATR, addedHydrogen.substring(0, addedHydrogen.length()-1))); - XOMTools.insertBefore(locant, addedHydrogenElement); + Element addedHydrogenElement = new TokenEl(ADDEDHYDROGEN_EL); + addedHydrogenElement.addAttribute(new Attribute(LOCANT_ATR, addedHydrogen.substring(0, addedHydrogen.length() - 1))); + OpsinTools.insertBefore(locant, addedHydrogenElement); } - if (locant.getAttribute(TYPE_ATR)==null){ + if (locant.getAttribute(TYPE_ATR) == null){ locant.addAttribute(new Attribute(TYPE_ATR, ADDEDHYDROGENLOCANT_TYPE_VAL));//this locant must not be used as an indirect locant } } else if (StringTools.endsWithCaseInsensitive(brackettedText, "R") || StringTools.endsWithCaseInsensitive(brackettedText, "S")){ locantText = m.replaceFirst("");//strip the bracket from the locantText - String rs = brackettedText; - Element newStereoChemEl = new Element(STEREOCHEMISTRY_EL); - newStereoChemEl.appendChild("(" + locantText +rs+")"); + String rs; + if (brackettedText.length() == 3 && (brackettedText.charAt(1) ==','|| brackettedText.charAt(1) =='/')) { + rs = new StringBuilder(2).append(brackettedText.charAt(0)).append(brackettedText.charAt(2)).toString(); + } + else { + rs = brackettedText; + } + + Element newStereoChemEl = new TokenEl(STEREOCHEMISTRY_EL, "(" + locantText + rs + ")"); newStereoChemEl.addAttribute(new Attribute(TYPE_ATR, STEREOCHEMISTRYBRACKET_TYPE_VAL)); - XOMTools.insertBefore(locant, newStereoChemEl); + OpsinTools.insertBefore(locant, newStereoChemEl); } - else if (matchDigit.matcher(brackettedText).matches()){ + else if (matchDigit.matcher(brackettedText).matches()) { //compounds locant e.g. 1(10). Leave as is, it will be handled by the function that handles unsaturation } else{ @@ -307,14 +312,14 @@ individualLocants.set(i, locantText); } - XOMTools.setTextChild(locant, StringTools.stringListToString(individualLocants, ",")); + locant.setValue(StringTools.stringListToString(individualLocants, ",")); - Element afterLocants = (Element)XOMTools.getNextSibling(locant); - if(afterLocants == null){ + Element afterLocants = OpsinTools.getNextSibling(locant); + if(afterLocants == null) { throw new ComponentGenerationException("Nothing after locant tag: " + locant.toXML()); } - if (individualLocants.size()==1){ + if (individualLocants.size() == 1) { ifCarbohydrateLocantConvertToAminoAcidStyleLocant(locant); } } @@ -327,33 +332,32 @@ * @param locant */ private static void ifCarbohydrateLocantConvertToAminoAcidStyleLocant(Element locant) { - if (MATCH_ELEMENT_SYMBOL.matcher(locant.getValue()).matches()){ - Element possibleMultiplier = (Element) XOMTools.getPreviousSibling(locant); - if (possibleMultiplier!=null && possibleMultiplier.getLocalName().equals(MULTIPLIER_EL)){ + if (MATCH_ELEMENT_SYMBOL.matcher(locant.getValue()).matches()) { + Element possibleMultiplier = OpsinTools.getPreviousSibling(locant); + if (possibleMultiplier != null && possibleMultiplier.getName().equals(MULTIPLIER_EL)) { int multiplierValue = Integer.parseInt(possibleMultiplier.getAttributeValue(VALUE_ATR)); - Element possibleOtherLocant = (Element) XOMTools.getPreviousSibling(possibleMultiplier); - if (possibleOtherLocant!=null){ - String[] locantValues = MATCH_COMMA.split(possibleOtherLocant.getValue()); + Element possibleOtherLocant = OpsinTools.getPreviousSibling(possibleMultiplier); + if (possibleOtherLocant != null) { + String[] locantValues = possibleOtherLocant.getValue().split(","); if (locantValues.length == Integer.parseInt(possibleMultiplier.getAttributeValue(VALUE_ATR))){ for (int i = 0; i < locantValues.length; i++) { locantValues[i] = locant.getValue() + locantValues[i]; } - XOMTools.setTextChild(possibleOtherLocant, StringTools.arrayToString(locantValues, ",")); + possibleOtherLocant.setValue(StringTools.arrayToString(locantValues, ",")); locant.detach(); } } else{ StringBuilder sb = new StringBuilder(); - for (int i = 0; i < multiplierValue-1; i++) { + for (int i = 0; i < multiplierValue - 1; i++) { sb.append(locant.getValue()); sb.append(StringTools.multiplyString("'", i)); sb.append(','); } sb.append(locant.getValue()); - sb.append(StringTools.multiplyString("'", multiplierValue-1)); - Element newLocant = new Element(LOCANT_EL); - newLocant.appendChild(sb.toString()); - XOMTools.insertBefore(possibleMultiplier, newLocant); + sb.append(StringTools.multiplyString("'", multiplierValue - 1)); + Element newLocant = new TokenEl(LOCANT_EL, sb.toString()); + OpsinTools.insertBefore(possibleMultiplier, newLocant); locant.detach(); } } @@ -397,24 +401,21 @@ * @throws ComponentGenerationException */ private void convertOrthoMetaParaToLocants(Element subOrRoot) throws ComponentGenerationException{ - List ompLocants = XOMTools.getChildElementsWithTagName(subOrRoot, ORTHOMETAPARA_EL); + List ompLocants = subOrRoot.getChildElements(ORTHOMETAPARA_EL); for (Element ompLocant : ompLocants) { String locantText = ompLocant.getValue(); String firstChar = locantText.substring(0, 1); - Element afterOmpLocant = (Element)XOMTools.getNextSibling(ompLocant); - ompLocant.setLocalName(LOCANT_EL); - ompLocant.removeChildren(); + ompLocant.setName(LOCANT_EL); ompLocant.addAttribute(new Attribute(TYPE_ATR, ORTHOMETAPARA_TYPE_VAL)); - if(afterOmpLocant.getLocalName().equals(MULTIPLIER_EL) && afterOmpLocant.getAttributeValue(VALUE_ATR).equals("2") || - (afterOmpLocant.getAttribute(OUTIDS_ATR)!=null && MATCH_COMMA.split(afterOmpLocant.getAttributeValue(OUTIDS_ATR)).length>1) ) { + if (orthoMetaParaLocantIsTwoLocants(ompLocant)) { if ("o".equalsIgnoreCase(firstChar)){ - ompLocant.appendChild("1,ortho"); + ompLocant.setValue("1,ortho"); } else if ("m".equalsIgnoreCase(firstChar)){ - ompLocant.appendChild("1,meta"); + ompLocant.setValue("1,meta"); } else if ("p".equalsIgnoreCase(firstChar)){ - ompLocant.appendChild("1,para"); + ompLocant.setValue("1,para"); } else{ throw new ComponentGenerationException(locantText + " was not identified as being either ortho, meta or para but according to the chemical grammar it should of been"); @@ -422,13 +423,13 @@ } else{ if ("o".equalsIgnoreCase(firstChar)){ - ompLocant.appendChild("ortho"); + ompLocant.setValue("ortho"); } else if ("m".equalsIgnoreCase(firstChar)){ - ompLocant.appendChild("meta"); + ompLocant.setValue("meta"); } else if ("p".equalsIgnoreCase(firstChar)){ - ompLocant.appendChild("para"); + ompLocant.setValue("para"); } else{ throw new ComponentGenerationException(locantText + " was not identified as being either ortho, meta or para but according to the chemical grammar it should of been"); @@ -437,6 +438,33 @@ } } + private boolean orthoMetaParaLocantIsTwoLocants(Element ompLocant) { + Element afterOmpLocant = OpsinTools.getNextSibling(ompLocant); + if (afterOmpLocant != null){ + String elName = afterOmpLocant.getName(); + if(elName.equals(MULTIPLIER_EL) && afterOmpLocant.getAttributeValue(VALUE_ATR).equals("2")){ + //e.g. p-dimethyl + return true; + } + String outIds = afterOmpLocant.getAttributeValue(OUTIDS_ATR); + if (outIds != null && outIds.split(",").length > 1) { + //e.g. p-phenylene + return true; + } + if(elName.equals(GROUP_EL)){ + Element multiplier = OpsinTools.getNextSibling(afterOmpLocant); + if(multiplier != null && multiplier.getName().equals(MULTIPLIER_EL) && multiplier.getAttributeValue(VALUE_ATR).equals("2")){ + Element suffix = OpsinTools.getNextSiblingIgnoringCertainElements(multiplier, new String[]{INFIX_EL, SUFFIXPREFIX_EL}); + if(suffix.getName().equals(SUFFIX_EL)){ + //e.g. o-benzenediamine + return true; + } + } + } + } + return false; + } + /** * Processes adjacent alkane stem component elements into a single alkaneStem group element with the appropriate SMILES * e.g. dodecane would be "do" value=2 and "dec" value=10 -->alkaneStem with 12 carbons @@ -444,34 +472,26 @@ * @param subOrRoot */ private void formAlkaneStemsFromComponents(Element subOrRoot) { - LinkedList alkaneStemComponents =new LinkedList(XOMTools.getChildElementsWithTagName(subOrRoot, ALKANESTEMCOMPONENT)); + Deque alkaneStemComponents =new ArrayDeque(subOrRoot.getChildElements(ALKANESTEMCOMPONENT)); while(!alkaneStemComponents.isEmpty()){ Element alkaneStemComponent = alkaneStemComponents.removeFirst(); int alkaneChainLength =0; StringBuilder alkaneName = new StringBuilder(); alkaneChainLength += Integer.parseInt(alkaneStemComponent.getAttributeValue(VALUE_ATR)); alkaneName.append(alkaneStemComponent.getValue()); - while (!alkaneStemComponents.isEmpty() && XOMTools.getNextSibling(alkaneStemComponent)==alkaneStemComponents.get(0)) { + while (!alkaneStemComponents.isEmpty() && OpsinTools.getNextSibling(alkaneStemComponent)==alkaneStemComponents.getFirst()) { alkaneStemComponent.detach(); alkaneStemComponent = alkaneStemComponents.removeFirst(); alkaneChainLength += Integer.parseInt(alkaneStemComponent.getAttributeValue(VALUE_ATR)); alkaneName.append(alkaneStemComponent.getValue()); } - Element alkaneStem = new Element(GROUP_EL); - alkaneStem.appendChild(alkaneName.toString()); + Element alkaneStem = new TokenEl(GROUP_EL, alkaneName.toString()); alkaneStem.addAttribute(new Attribute(TYPE_ATR, CHAIN_TYPE_VAL)); alkaneStem.addAttribute(new Attribute(SUBTYPE_ATR, ALKANESTEM_SUBTYPE_VAL)); - alkaneStem.addAttribute(new Attribute(VALTYPE_ATR, SMILES_VALTYPE_VAL)); alkaneStem.addAttribute(new Attribute(VALUE_ATR, StringTools.multiplyString("C", alkaneChainLength))); alkaneStem.addAttribute(new Attribute(USABLEASJOINER_ATR, "yes")); - StringBuilder labels = new StringBuilder(); - for (int i=1; i alkaneStemModifiers = subOrRoot.getChildElements(ALKANESTEMMODIFIER_EL); + for (Element alkaneStemModifier : alkaneStemModifiers) { + Element alkane = OpsinTools.getNextSibling(alkaneStemModifier); if (alkane ==null || !CHAIN_TYPE_VAL.equals(alkane.getAttributeValue(TYPE_ATR)) || !ALKANESTEM_SUBTYPE_VAL.equals(alkane.getAttributeValue(SUBTYPE_ATR))){ throw new ComponentGenerationException("OPSIN Bug: AlkaneStem not found after alkaneStemModifier"); @@ -517,9 +536,8 @@ //normal behaviour is default so don't need to do anything //n-methyl and n-ethyl contain redundant information and are probably intended to mean N-methyl/N-ethyl if ((chainLength==1 || chainLength ==2) && alkaneStemModifier.getValue().equals("n-")){ - Element locant = new Element(LOCANT_EL); - locant.appendChild("N"); - XOMTools.insertBefore(alkane, locant); + Element locant = new TokenEl(LOCANT_EL, "N"); + OpsinTools.insertBefore(alkane, locant); } continue; } @@ -545,14 +563,19 @@ if (chainLength==3 && !suffixPresent){ throw new ComponentGenerationException("iso has no meaning without a suffix on an alkane chain of length 3"); } - smiles =StringTools.multiplyString("C", chainLength-3) +"C(C)C"; - StringBuilder sb = new StringBuilder(); - for (int c = 1; c <= chainLength - 2; c++) { - sb.append(c); + if (chainLength ==8 && !suffixPresent){ + smiles = "C(C)(C)CC(C)(C)C"; + } + else{ + smiles =StringTools.multiplyString("C", chainLength - 3) +"C(C)C"; + StringBuilder sb = new StringBuilder(); + for (int c = 1; c <= chainLength - 2; c++) { + sb.append(c); + sb.append('/'); + } sb.append('/'); + labels = sb.toString(); } - sb.append('/'); - labels = sb.toString(); } else if (type.equals("sec")){ if (chainLength <3){ @@ -586,25 +609,25 @@ * @throws ComponentGenerationException */ private void processHeterogenousHydrides(Element subOrRoot) throws ComponentGenerationException { - List multipliers = XOMTools.getChildElementsWithTagName(subOrRoot, MULTIPLIER_EL); + List multipliers = subOrRoot.getChildElements(MULTIPLIER_EL); for (int i = 0; i < multipliers.size(); i++) { Element m = multipliers.get(i); if (m.getAttributeValue(TYPE_ATR).equals(GROUP_TYPE_VAL)){ continue; } - Element multipliedElem = (Element)XOMTools.getNextSibling(m); + Element multipliedElem = OpsinTools.getNextSibling(m); - if(multipliedElem.getLocalName().equals(GROUP_EL) && + if(multipliedElem.getName().equals(GROUP_EL) && multipliedElem.getAttribute(SUBTYPE_ATR)!=null && multipliedElem.getAttributeValue(SUBTYPE_ATR).equals(HETEROSTEM_SUBTYPE_VAL)) { int mvalue = Integer.parseInt(m.getAttributeValue(VALUE_ATR)); - Element possiblyALocant = (Element)XOMTools.getPreviousSibling(m);//detect rare case where multiplier does not mean form a chain of heteroatoms e.g. something like 1,2-disulfanylpropane - if(possiblyALocant !=null && possiblyALocant.getLocalName().equals(LOCANT_EL)&& mvalue==MATCH_COMMA.split(possiblyALocant.getValue()).length){ - Element suffix =(Element) XOMTools.getNextSibling(multipliedElem, SUFFIX_EL); + Element possiblyALocant = OpsinTools.getPreviousSibling(m);//detect rare case where multiplier does not mean form a chain of heteroatoms e.g. something like 1,2-disulfanylpropane + if(possiblyALocant !=null && possiblyALocant.getName().equals(LOCANT_EL)&& mvalue==possiblyALocant.getValue().split(",").length){ + Element suffix = OpsinTools.getNextSibling(multipliedElem, SUFFIX_EL); if (suffix !=null && suffix.getAttributeValue(TYPE_ATR).equals(INLINE_TYPE_VAL)){ - Element possibleMultiplier = (Element) XOMTools.getPreviousSibling(suffix); - if (!possibleMultiplier.getLocalName().equals(MULTIPLIER_EL)){//NOT something like 3,3'-diselane-1,2-diyl + Element possibleMultiplier = OpsinTools.getPreviousSibling(suffix); + if (!possibleMultiplier.getName().equals(MULTIPLIER_EL)){//NOT something like 3,3'-diselane-1,2-diyl continue; } } @@ -612,9 +635,9 @@ //chain of heteroatoms String heteroatomSmiles=multipliedElem.getAttributeValue(VALUE_ATR); - if (heteroatomSmiles.equals("B") && XOMTools.getPreviousSibling(m)==null){ - Element possibleUnsaturator = (Element) XOMTools.getNextSibling(multipliedElem); - if (possibleUnsaturator !=null && possibleUnsaturator.getLocalName().equals(UNSATURATOR_EL) && possibleUnsaturator.getAttributeValue(VALUE_ATR).equals("1")){ + if (heteroatomSmiles.equals("B") && OpsinTools.getPreviousSibling(m)==null){ + Element possibleUnsaturator = OpsinTools.getNextSibling(multipliedElem); + if (possibleUnsaturator !=null && possibleUnsaturator.getName().equals(UNSATURATOR_EL) && possibleUnsaturator.getAttributeValue(VALUE_ATR).equals("1")){ throw new ComponentGenerationException("Polyboranes are not currently supported"); } } @@ -628,21 +651,24 @@ if (m.getAttributeValue(TYPE_ATR).equals(GROUP_TYPE_VAL)){ continue; } - Element multipliedElem = (Element)XOMTools.getNextSibling(m); - if(multipliedElem.getLocalName().equals(HETEROATOM_EL)){ - Element possiblyAnotherHeteroAtom = (Element)XOMTools.getNextSibling(multipliedElem); - if (possiblyAnotherHeteroAtom !=null && possiblyAnotherHeteroAtom.getLocalName().equals(HETEROATOM_EL)){ - Element possiblyAnUnsaturator = XOMTools.getNextSiblingIgnoringCertainElements(possiblyAnotherHeteroAtom, new String[]{LOCANT_EL, MULTIPLIER_EL});//typically ane but can be ene or yne e.g. triphosphaza-1,3-diene - if (possiblyAnUnsaturator !=null && possiblyAnUnsaturator.getLocalName().equals(UNSATURATOR_EL)){ + Element multipliedElem = OpsinTools.getNextSibling(m); + if(multipliedElem.getName().equals(HETEROATOM_EL)){ + Element possiblyAnotherHeteroAtom = OpsinTools.getNextSibling(multipliedElem); + if (possiblyAnotherHeteroAtom !=null && possiblyAnotherHeteroAtom.getName().equals(HETEROATOM_EL)){ + Element possiblyAnUnsaturator = OpsinTools.getNextSiblingIgnoringCertainElements(possiblyAnotherHeteroAtom, new String[]{LOCANT_EL, MULTIPLIER_EL});//typically ane but can be ene or yne e.g. triphosphaza-1,3-diene + if (possiblyAnUnsaturator !=null && possiblyAnUnsaturator.getName().equals(UNSATURATOR_EL)){ + StringBuilder newGroupName = new StringBuilder(m.getValue()); + newGroupName.append(multipliedElem.getValue()); + newGroupName.append(possiblyAnotherHeteroAtom.getValue()); //chain of alternating heteroatoms if (possiblyAnUnsaturator.getAttributeValue(VALUE_ATR).equals("1")){ checkForAmbiguityWithHWring(multipliedElem.getAttributeValue(VALUE_ATR), possiblyAnotherHeteroAtom.getAttributeValue(VALUE_ATR)); } int mvalue = Integer.parseInt(m.getAttributeValue(VALUE_ATR)); StringBuilder smilesSB= new StringBuilder(); - Element possiblyARingFormingEl = (Element)XOMTools.getPreviousSibling(m); - boolean heteroatomChainWillFormARing =false; - if (possiblyARingFormingEl!=null && (possiblyARingFormingEl.getLocalName().equals(CYCLO_EL) || possiblyARingFormingEl.getLocalName().equals(VONBAEYER_EL) || possiblyARingFormingEl.getLocalName().equals(SPIRO_EL))){ + Element possiblyARingFormingEl = OpsinTools.getPreviousSibling(m); + boolean heteroatomChainWillFormARing = false; + if (possiblyARingFormingEl!=null && (possiblyARingFormingEl.getName().equals(CYCLO_EL) || possiblyARingFormingEl.getName().equals(VONBAEYER_EL) || possiblyARingFormingEl.getName().equals(SPIRO_EL))){ heteroatomChainWillFormARing=true; //will be cyclised later. for (int j = 0; j < mvalue; j++) { @@ -661,16 +687,15 @@ smiles = matchHdigit.matcher(smiles).replaceAll("H?");//hydrogen count will be determined by standard valency multipliedElem.detach(); - Element addedGroup=new Element(GROUP_EL); + Element addedGroup = new TokenEl(GROUP_EL, newGroupName.toString()); addedGroup.addAttribute(new Attribute(VALUE_ATR, smiles)); - addedGroup.addAttribute(new Attribute(VALTYPE_ATR, SMILES_VALTYPE_VAL)); + addedGroup.addAttribute(new Attribute(LABELS_ATR, NUMERIC_LABELS_VAL)); addedGroup.addAttribute(new Attribute(TYPE_ATR, CHAIN_TYPE_VAL)); addedGroup.addAttribute(new Attribute(SUBTYPE_ATR, HETEROSTEM_SUBTYPE_VAL)); if (!heteroatomChainWillFormARing){ addedGroup.addAttribute(new Attribute(USABLEASJOINER_ATR, "yes")); } - addedGroup.appendChild(smiles); - XOMTools.insertAfter(possiblyAnotherHeteroAtom, addedGroup); + OpsinTools.insertAfter(possiblyAnotherHeteroAtom, addedGroup); possiblyAnotherHeteroAtom.detach(); m.detach(); @@ -678,10 +703,10 @@ else if (possiblyAnUnsaturator!=null && possiblyAnUnsaturator.getValue().equals("an") && HANTZSCHWIDMAN_SUBTYPE_VAL.equals(possiblyAnUnsaturator.getAttributeValue(SUBTYPE_ATR))){ //check for HWring that should be interpreted as a heterogenous hydride boolean foundLocantIndicatingHwRingHeteroatomPositions =false;//allow formally incorrect HW ring systems if they have locants - Element possibleLocant = (Element) XOMTools.getPreviousSibling(m); - if (possibleLocant !=null && possibleLocant.getLocalName().equals(LOCANT_EL)){ + Element possibleLocant = OpsinTools.getPreviousSibling(m); + if (possibleLocant !=null && possibleLocant.getName().equals(LOCANT_EL)){ int expected = Integer.parseInt(m.getAttributeValue(VALUE_ATR)) + 1; - if (expected == MATCH_COMMA.split(possibleLocant.getValue()).length){ + if (expected == possibleLocant.getValue().split(",").length){ foundLocantIndicatingHwRingHeteroatomPositions = true; } } @@ -707,17 +732,17 @@ if (!m.find()){ throw new ComponentGenerationException("Failed to extract element from heteroatom"); } - String atom1Element = m.group(); + ChemEl atom1ChemEl = ChemEl.valueOf(m.group()); m = MATCH_ELEMENT_SYMBOL.matcher(secondHeteroAtomSMILES); if (!m.find()){ throw new ComponentGenerationException("Failed to extract element from heteroatom"); } - String atom2Element = m.group(); - if (AtomProperties.elementToHwPriority.get(atom1Element) > AtomProperties.elementToHwPriority.get(atom2Element)){ - if (atom2Element.equals("O") || atom2Element.equals("S") || atom2Element.equals("Se") || atom2Element.equals("Te") - || atom2Element.equals("Bi") || atom2Element.equals("Hg")){ - if (!hasSiorGeorSnorPb(atom1Element, atom2Element)){ + ChemEl atom2ChemEl = ChemEl.valueOf(m.group()); + if (AtomProperties.getHwpriority(atom1ChemEl) > AtomProperties.getHwpriority(atom2ChemEl)){ + if (atom2ChemEl == ChemEl.O || atom2ChemEl == ChemEl.S || atom2ChemEl == ChemEl.Se || atom2ChemEl == ChemEl.Te + || atom2ChemEl == ChemEl.Bi || atom2ChemEl == ChemEl.Hg){ + if (!hasSiorGeorSnorPb(atom1ChemEl, atom2ChemEl)){ throw new ComponentGenerationException("Hantzch-widman ring misparsed as a heterogeneous hydride with alternating atoms"); } } @@ -726,13 +751,13 @@ /** * Are either of the elements Si/Ge/Sn/Pb - * @param atom1Element - * @param atom2Element + * @param atom1ChemEl + * @param atom2ChemEl * @return */ - private boolean hasSiorGeorSnorPb(String atom1Element, String atom2Element) { - return (atom1Element.equals("Si") || atom1Element.equals("Ge") || atom1Element.equals("Sn") ||atom1Element.equals("Pb") - || atom2Element.equals("Si") || atom2Element.equals("Ge") || atom2Element.equals("Sn") ||atom2Element.equals("Pb")); + private boolean hasSiorGeorSnorPb(ChemEl atom1ChemEl, ChemEl atom2ChemEl) { + return (atom1ChemEl == ChemEl.Si || atom1ChemEl == ChemEl.Ge || atom1ChemEl == ChemEl.Sn || atom1ChemEl == ChemEl.Pb + || atom2ChemEl == ChemEl.Si || atom2ChemEl == ChemEl.Ge || atom2ChemEl == ChemEl.Sn || atom2ChemEl == ChemEl.Pb); } /** @@ -754,7 +779,7 @@ throw new ComponentGenerationException("Failed to extract element from heteroatom"); } String atom2Element = m.group(); - if (AtomProperties.elementToHwPriority.get(atom2Element) > AtomProperties.elementToHwPriority.get(atom1Element)){ + if (AtomProperties.getHwpriority(ChemEl.valueOf(atom2Element)) > AtomProperties.getHwpriority(ChemEl.valueOf(atom1Element))){ throw new ComponentGenerationException("heterogeneous hydride with alternating atoms misparsed as a Hantzch-widman ring"); } } @@ -765,18 +790,18 @@ * @throws ComponentGenerationException */ private void processIndicatedHydrogens(Element subOrRoot) throws ComponentGenerationException { - List indicatedHydrogens = XOMTools.getChildElementsWithTagName(subOrRoot, INDICATEDHYDROGEN_EL); + List indicatedHydrogens = subOrRoot.getChildElements(INDICATEDHYDROGEN_EL); for (Element indicatedHydrogenGroup : indicatedHydrogens) { String txt = StringTools.removeDashIfPresent(indicatedHydrogenGroup.getValue()); if (!StringTools.endsWithCaseInsensitive(txt, "h")){//remove brackets if they are present txt = txt.substring(1, txt.length()-1); } - String[] hydrogenLocants =MATCH_COMMA.split(txt); + String[] hydrogenLocants =txt.split(","); for (String hydrogenLocant : hydrogenLocants) { if (StringTools.endsWithCaseInsensitive(hydrogenLocant, "h")) { - Element indicatedHydrogenEl = new Element(INDICATEDHYDROGEN_EL); + Element indicatedHydrogenEl = new TokenEl(INDICATEDHYDROGEN_EL); indicatedHydrogenEl.addAttribute(new Attribute(LOCANT_ATR, hydrogenLocant.substring(0, hydrogenLocant.length() - 1))); - XOMTools.insertBefore(indicatedHydrogenGroup, indicatedHydrogenEl); + OpsinTools.insertBefore(indicatedHydrogenGroup, indicatedHydrogenEl); } else{ throw new ComponentGenerationException("OPSIN Bug: malformed indicated hydrogen element!"); @@ -792,8 +817,9 @@ * @param subOrRoot The substituent/root to looks for stereoChemistry in. * @throws ComponentGenerationException */ - static void processStereochemistry(Element subOrRoot) throws ComponentGenerationException { - List stereoChemistryElements = XOMTools.getChildElementsWithTagName(subOrRoot, STEREOCHEMISTRY_EL); + void processStereochemistry(Element subOrRoot) throws ComponentGenerationException { + List stereoChemistryElements = subOrRoot.getChildElements(STEREOCHEMISTRY_EL); + List locantedUnbrackettedEzTerms = new ArrayList(); for (Element stereoChemistryElement : stereoChemistryElements) { if (stereoChemistryElement.getAttributeValue(TYPE_ATR).equals(STEREOCHEMISTRYBRACKET_TYPE_VAL)){ processStereochemistryBracket(stereoChemistryElement); @@ -802,41 +828,76 @@ assignLocantUsingPreviousElementIfPresent(stereoChemistryElement);//assign a locant if one is directly before the cis/trans } else if (stereoChemistryElement.getAttributeValue(TYPE_ATR).equals(E_OR_Z_TYPE_VAL)){ - stereoChemistryElement.addAttribute(new Attribute(VALUE_ATR, stereoChemistryElement.getValue().toUpperCase())); - assignLocantUsingPreviousElementIfPresent(stereoChemistryElement);//assign a locant if one is directly before the E/Z + stereoChemistryElement.addAttribute(new Attribute(VALUE_ATR, stereoChemistryElement.getValue().toUpperCase(Locale.ROOT))); + if (assignLocantUsingPreviousElementIfPresent(stereoChemistryElement)) {//assign a locant if one is directly before the E/Z + locantedUnbrackettedEzTerms.add(stereoChemistryElement); + } + } + else if (stereoChemistryElement.getAttributeValue(TYPE_ATR).equals(ENDO_EXO_SYN_ANTI_TYPE_VAL)){ + processLocantAssigningForEndoExoSynAnti(stereoChemistryElement);//assign a locant if one is directly before the endo/exo/syn/anti. Don't neccesarily detach it } else if (stereoChemistryElement.getAttributeValue(TYPE_ATR).equals(ALPHA_OR_BETA_TYPE_VAL)){ processUnbracketedAlphaBetaStereochemistry(stereoChemistryElement); } + else if (stereoChemistryElement.getAttributeValue(TYPE_ATR).equals(RELATIVECISTRANS_TYPE_VAL)){ + processRelativeCisTrans(stereoChemistryElement); + } + } + if (locantedUnbrackettedEzTerms.size() > 0) { + duplicateLocantFromStereoTermIfAdjacentToEneOrYlidene(locantedUnbrackettedEzTerms); } } - private static void processStereochemistryBracket(Element stereoChemistryElement) throws ComponentGenerationException { - String txt = StringTools.removeDashIfPresent(stereoChemistryElement.getValue()); - if (txt.startsWith("rel-")){ + private void processStereochemistryBracket(Element stereoChemistryElement) throws ComponentGenerationException { + String txt = stereoChemistryElement.getValue(); + if (StringTools.startsWithCaseInsensitive(txt, "rel-")){ txt = txt.substring(4); } - Matcher starMatcher = matchStar.matcher(txt); - txt = starMatcher.replaceAll(""); - if (!txt.startsWith("rac-")){ - txt =txt.substring(1, txt.length()-1);//remove opening and closing bracket. - String[] stereoChemistryDescriptors = matchCommaOrDash.split(txt); - for (String stereoChemistryDescriptor : stereoChemistryDescriptors) { - Matcher m = matchStereochemistry.matcher(stereoChemistryDescriptor); - if (m.matches()){ - if (!m.group(2).equals("RS") && !m.group(2).equals("SR")){ - Element stereoChemEl = new Element(STEREOCHEMISTRY_EL); - if (m.group(1).length()!=0){ - stereoChemEl.addAttribute(new Attribute(LOCANT_ATR, m.group(1))); + txt = StringTools.removeDashIfPresent(txt); + txt = matchStar.matcher(txt).replaceAll(""); + boolean racemicStereo; + Matcher racemicMacher = matchRacemic.matcher(txt); + if (racemicMacher.lookingAt()) { + txt = txt.substring(racemicMacher.group().length()); + racemicStereo = true; + } + else { + racemicStereo = false; + } + if (txt.length() > 0) {//if txt is just "rel- or rac-" then it will be length 0 at this point + List stereoChemistryDescriptors = splitStereoBracketIntoDescriptors(txt); + boolean exclusiveStereoTerm = false; + if (stereoChemistryDescriptors.size() == 1){ + String stereoChemistryDescriptor = stereoChemistryDescriptors.get(0); + if (stereoChemistryDescriptor.equalsIgnoreCase("rel")){ + exclusiveStereoTerm = true; + } + if (matchRacemic.matcher(stereoChemistryDescriptor).matches()) { + racemicStereo = true; + exclusiveStereoTerm = true; + } + } + if (!exclusiveStereoTerm) { + for (String stereoChemistryDescriptor : stereoChemistryDescriptors) { + Matcher m = matchStereochemistry.matcher(stereoChemistryDescriptor); + if (m.matches()){ + Element stereoChemEl = new TokenEl(STEREOCHEMISTRY_EL, stereoChemistryDescriptor); + String locantVal = m.group(1); + if (locantVal.length() > 0){ + stereoChemEl.addAttribute(new Attribute(LOCANT_ATR, StringTools.removeDashIfPresent(locantVal))); } - stereoChemEl.appendChild(stereoChemistryDescriptor); - XOMTools.insertBefore(stereoChemistryElement, stereoChemEl); + OpsinTools.insertBefore(stereoChemistryElement, stereoChemEl); if (matchRS.matcher(m.group(2)).matches()) { stereoChemEl.addAttribute(new Attribute(TYPE_ATR, R_OR_S_TYPE_VAL)); - stereoChemEl.addAttribute(new Attribute(VALUE_ATR, m.group(2).toUpperCase())); + String symbol = m.group(2).toUpperCase(Locale.ROOT).replaceAll("/", ""); + if (racemicStereo && symbol.length() == 1){ + symbol = (symbol.equals("R")) ? "RS" : "SR"; + } + stereoChemEl.addAttribute(new Attribute(VALUE_ATR, symbol)); } else if (matchEZ.matcher(m.group(2)).matches()) { stereoChemEl.addAttribute(new Attribute(TYPE_ATR, E_OR_Z_TYPE_VAL)); - stereoChemEl.addAttribute(new Attribute(VALUE_ATR, m.group(2).toUpperCase())); + String symbol = m.group(2).toUpperCase(Locale.ROOT); + stereoChemEl.addAttribute(new Attribute(VALUE_ATR,symbol)); } else if (matchAlphaBetaStereochem.matcher(m.group(2)).matches()){ stereoChemEl.addAttribute(new Attribute(TYPE_ATR, ALPHA_OR_BETA_TYPE_VAL)); if (Character.toLowerCase(m.group(2).charAt(0)) == 'a'){ @@ -853,31 +914,82 @@ } } else if (matchCisTrans.matcher(m.group(2)).matches()) { stereoChemEl.addAttribute(new Attribute(TYPE_ATR, CISORTRANS_TYPE_VAL)); - stereoChemEl.addAttribute(new Attribute(VALUE_ATR, m.group(2).toLowerCase())); + stereoChemEl.addAttribute(new Attribute(VALUE_ATR, m.group(2).toLowerCase(Locale.ROOT))); + } else if (matchEndoExoSynAnti.matcher(m.group(2)).matches()) { + stereoChemEl.addAttribute(new Attribute(TYPE_ATR, ENDO_EXO_SYN_ANTI_TYPE_VAL)); + stereoChemEl.addAttribute(new Attribute(VALUE_ATR, m.group(2).toLowerCase(Locale.ROOT))); + } else if (matchAxialStereo.matcher(m.group(2)).matches()) { + stereoChemEl.addAttribute(new Attribute(TYPE_ATR, AXIAL_TYPE_VAL)); + stereoChemEl.addAttribute(new Attribute(VALUE_ATR, m.group(2))); } else { throw new ComponentGenerationException("Malformed stereochemistry element: " + stereoChemistryElement.getValue()); } - - } - } else { - throw new ComponentGenerationException("Malformed stereochemistry element: " + stereoChemistryElement.getValue()); - } - } + } else { + throw new ComponentGenerationException("Malformed stereochemistry element: " + stereoChemistryElement.getValue()); + } + } + } } stereoChemistryElement.detach(); } - private static void assignLocantUsingPreviousElementIfPresent(Element stereoChemistryElement) { - Element possibleLocant = (Element) XOMTools.getPrevious(stereoChemistryElement); - if (possibleLocant !=null && possibleLocant.getLocalName().equals(LOCANT_EL) && MATCH_COMMA.split(possibleLocant.getValue()).length==1){ + private List splitStereoBracketIntoDescriptors(String stereoBracket) { + List stereoDescriptors = new ArrayList(); + StringBuilder sb = new StringBuilder(); + //ignore first and last character (opening and closing bracket) + for (int i = 1, l = stereoBracket.length() - 1; i < l; i++) { + char ch = stereoBracket.charAt(i); + if (ch ==','){ + stereoDescriptors.add(sb.toString()); + sb.setLength(0); + } + else if (ch == '-'){ + if (matchStereochemistry.matcher(sb.toString()).matches()){ + //delimiter between stereochemistry + stereoDescriptors.add(sb.toString()); + sb.setLength(0); + } + else{ + //locanted stereochemistry term + sb.append(ch); + } + } + else{ + sb.append(ch); + } + } + stereoDescriptors.add(sb.toString()); + return stereoDescriptors; + } + + private boolean assignLocantUsingPreviousElementIfPresent(Element stereoChemistryElement) { + Element possibleLocant = OpsinTools.getPrevious(stereoChemistryElement); + if (possibleLocant !=null && possibleLocant.getName().equals(LOCANT_EL) && possibleLocant.getValue().split(",").length==1){ stereoChemistryElement.addAttribute(new Attribute(LOCANT_ATR, possibleLocant.getValue())); possibleLocant.detach(); + return true; + } + return false; + } + + private void processLocantAssigningForEndoExoSynAnti(Element stereoChemistryElement) { + Element possibleLocant = OpsinTools.getPrevious(stereoChemistryElement); + if (possibleLocant !=null && possibleLocant.getName().equals(LOCANT_EL) && possibleLocant.getValue().split(",").length==1){ + stereoChemistryElement.addAttribute(new Attribute(LOCANT_ATR, possibleLocant.getValue())); + Element group = OpsinTools.getNextSibling(stereoChemistryElement, GROUP_EL); + if (group != null && + (CYCLICUNSATURABLEHYDROCARBON_SUBTYPE_VAL.equals(group.getAttributeValue(SUBTYPE_ATR)) + || OpsinTools.getPreviousSibling(group).getName().equals(VONBAEYER_EL))){ + //detach locant only if we're sure it has no other meaning + //typically locants in front of endo/exo/syn/anti also indicate the position of a susbtituent/suffix e.g. 3-exo-amino + possibleLocant.detach(); + } } } - private static void processUnbracketedAlphaBetaStereochemistry(Element stereoChemistryElement) throws ComponentGenerationException { + private void processUnbracketedAlphaBetaStereochemistry(Element stereoChemistryElement) throws ComponentGenerationException { String txt = StringTools.removeDashIfPresent(stereoChemistryElement.getValue()); - String[] stereoChemistryDescriptors = MATCH_COMMA.split(txt); + String[] stereoChemistryDescriptors = txt.split(","); List locants = new ArrayList(); boolean createLocantsEl =false; for (String stereoChemistryDescriptor : stereoChemistryDescriptors) { @@ -888,10 +1000,9 @@ locants.add(locant); Matcher alphaBetaMatcher = matchAlphaBetaStereochem.matcher(possibleAlphaBeta); if (alphaBetaMatcher.matches()){ - Element stereoChemEl = new Element(STEREOCHEMISTRY_EL); + Element stereoChemEl = new TokenEl(STEREOCHEMISTRY_EL, stereoChemistryDescriptor); stereoChemEl.addAttribute(new Attribute(LOCANT_ATR, locant)); - stereoChemEl.appendChild(stereoChemistryDescriptor); - XOMTools.insertBefore(stereoChemistryElement, stereoChemEl); + OpsinTools.insertBefore(stereoChemistryElement, stereoChemEl); stereoChemEl.addAttribute(new Attribute(TYPE_ATR, ALPHA_OR_BETA_TYPE_VAL)); if (Character.toLowerCase(possibleAlphaBeta.charAt(0)) == 'a'){ stereoChemEl.addAttribute(new Attribute(VALUE_ATR, "alpha")); @@ -914,7 +1025,7 @@ if (!createLocantsEl){ //create locants unless a group supporting alpha/beta stereochem is within this substituent/root createLocantsEl =true; - List groups = XOMTools.getNextSiblingsOfType(stereoChemistryElement, GROUP_EL); + List groups = OpsinTools.getNextSiblingsOfType(stereoChemistryElement, GROUP_EL); for (Element group : groups) { if (group.getAttributeValue(ALPHABETACLOCKWISEATOMORDERING_ATR)!=null){ createLocantsEl=false; @@ -924,12 +1035,81 @@ } if (createLocantsEl){ - Element newLocantEl = new Element(LOCANT_EL); - newLocantEl.appendChild(StringTools.stringListToString(locants, ",")); - XOMTools.insertAfter(stereoChemistryElement, newLocantEl); + Element newLocantEl = new TokenEl(LOCANT_EL, StringTools.stringListToString(locants, ",")); + OpsinTools.insertAfter(stereoChemistryElement, newLocantEl); } stereoChemistryElement.detach(); } + + private void processRelativeCisTrans(Element stereoChemistryElement) { + String value = StringTools.removeDashIfPresent(stereoChemistryElement.getValue()); + StringBuilder sb = new StringBuilder(); + String[] terms = value.split(","); + for (String term : terms) { + if (term.startsWith("c-")|| term.startsWith("t-") || term.startsWith("r-")){ + if (sb.length() > 0){ + sb.append(','); + } + sb.append(term.substring(2)); + } + else{ + throw new RuntimeException("Malformed relativeCisTrans element"); + } + } + Element locantEl = new TokenEl(LOCANT_EL, sb.toString()); + OpsinTools.insertAfter(stereoChemistryElement, locantEl); + } + + /** + * If the e/z term is next to an ene or ylidene duplicate the locant + * e.g. 2E,4Z-diene --> 2E,4Z-2,4-diene + * 2E-ylidene --> 2E-2-ylidene + * @param locantedUnbrackettedEzTerms + */ + private void duplicateLocantFromStereoTermIfAdjacentToEneOrYlidene(List locantedUnbrackettedEzTerms) { + for (int i = 0, l = locantedUnbrackettedEzTerms.size(); i < l; i++) { + Element currentTerm = locantedUnbrackettedEzTerms.get(i); + List groupedTerms = new ArrayList(); + groupedTerms.add(currentTerm); + while (i + 1 < l && locantedUnbrackettedEzTerms.get(i + 1).equals(OpsinTools.getNextSibling(currentTerm))) { + currentTerm = locantedUnbrackettedEzTerms.get(++i); + groupedTerms.add(currentTerm); + } + Element lastTermInGroup = groupedTerms.get(groupedTerms.size() - 1); + Element eneOrYlidene; + if (groupedTerms.size() > 1) { + Element multiplier = OpsinTools.getNextSibling(lastTermInGroup); + if (!(multiplier != null && multiplier.getName().equals(MULTIPLIER_EL) && String.valueOf(groupedTerms.size()).equals(multiplier.getAttributeValue(VALUE_ATR)))) { + continue; + } + eneOrYlidene = OpsinTools.getNextSibling(multiplier); + } + else { + eneOrYlidene = OpsinTools.getNextSibling(lastTermInGroup); + } + if (eneOrYlidene != null) { + String name = eneOrYlidene.getName(); + if (name.equals(UNSATURATOR_EL) || name.equals(SUFFIX_EL)) { + if ((name.equals(UNSATURATOR_EL) && eneOrYlidene.getAttributeValue(VALUE_ATR).equals("2")) + || (name.equals(SUFFIX_EL) && eneOrYlidene.getAttributeValue(VALUE_ATR).equals("ylidene"))) { + List locants = new ArrayList(); + for (Element stereochemistryTerm : groupedTerms) { + locants.add(stereochemistryTerm.getAttributeValue(LOCANT_ATR)); + } + Element newLocant = new TokenEl(LOCANT_EL, StringTools.stringListToString(locants, ",")); + OpsinTools.insertAfter(lastTermInGroup, newLocant); + } else{ + if (name.equals(UNSATURATOR_EL)){ + throw new RuntimeException("After E/Z stereo expected ene but found: " + eneOrYlidene.getValue()); + } + else { + throw new RuntimeException("After E/Z stereo expected yldiene but found: " + eneOrYlidene.getValue()); + } + } + } + } + } + } /** * Looks for "suffixPrefix" and assigns their value them as an attribute of an adjacent suffix @@ -937,10 +1117,10 @@ * @throws ComponentGenerationException */ private void processSuffixPrefixes(Element subOrRoot) throws ComponentGenerationException { - List suffixPrefixes = XOMTools.getChildElementsWithTagName(subOrRoot, SUFFIXPREFIX_EL); + List suffixPrefixes = subOrRoot.getChildElements(SUFFIXPREFIX_EL); for (Element suffixPrefix : suffixPrefixes) { - Element suffix = (Element) XOMTools.getNextSibling(suffixPrefix); - if (suffix==null || ! suffix.getLocalName().equals(SUFFIX_EL)){ + Element suffix = OpsinTools.getNextSibling(suffixPrefix); + if (suffix==null || ! suffix.getName().equals(SUFFIX_EL)){ throw new ComponentGenerationException("OPSIN bug: suffix not found after suffixPrefix: " + suffixPrefix.getValue()); } suffix.addAttribute(new Attribute(SUFFIXPREFIX_ATR, suffixPrefix.getAttributeValue(VALUE_ATR))); @@ -958,10 +1138,10 @@ * @throws ComponentGenerationException */ private void processInfixes(Element subOrRoot) throws ComponentGenerationException { - List infixes = XOMTools.getChildElementsWithTagName(subOrRoot, INFIX_EL); + List infixes = subOrRoot.getChildElements(INFIX_EL); for (Element infix : infixes) { - Element suffix = XOMTools.getNextSiblingIgnoringCertainElements(infix, new String[]{INFIX_EL, SUFFIXPREFIX_EL, MULTIPLIER_EL}); - if (suffix ==null || !suffix.getLocalName().equals(SUFFIX_EL)){ + Element suffix = OpsinTools.getNextSiblingIgnoringCertainElements(infix, new String[]{INFIX_EL, SUFFIXPREFIX_EL, MULTIPLIER_EL}); + if (suffix ==null || !suffix.getName().equals(SUFFIX_EL)){ throw new ComponentGenerationException("No suffix found next next to infix: "+ infix.getValue()); } List currentInfixInformation; @@ -970,23 +1150,23 @@ currentInfixInformation = new ArrayList(); } else{ - currentInfixInformation = StringTools.arrayToList(MATCH_SEMICOLON.split(suffix.getAttributeValue(INFIX_ATR))); + currentInfixInformation = StringTools.arrayToList(suffix.getAttributeValue(INFIX_ATR).split(";")); } String infixValue =infix.getAttributeValue(VALUE_ATR); currentInfixInformation.add(infixValue); - Element possibleMultiplier = (Element) XOMTools.getPreviousSibling(infix); + Element possibleMultiplier = OpsinTools.getPreviousSibling(infix); Element possibleBracket; boolean multiplierKnownToIndicateInfixMultiplicationPresent =false; - if (possibleMultiplier.getLocalName().equals(MULTIPLIER_EL)){ + if (possibleMultiplier.getName().equals(MULTIPLIER_EL)){ //suffix prefix present so multiplier must indicate infix replacement - Element possibleSuffixPrefix = XOMTools.getPreviousSiblingIgnoringCertainElements(infix, new String[]{MULTIPLIER_EL, INFIX_EL}); - if (possibleSuffixPrefix!=null && possibleSuffixPrefix.getLocalName().equals(SUFFIXPREFIX_EL)){ + Element possibleSuffixPrefix = OpsinTools.getPreviousSiblingIgnoringCertainElements(infix, new String[]{MULTIPLIER_EL, INFIX_EL}); + if (possibleSuffixPrefix!=null && possibleSuffixPrefix.getName().equals(SUFFIXPREFIX_EL)){ multiplierKnownToIndicateInfixMultiplicationPresent =true; } - Element elementBeforeMultiplier = (Element) XOMTools.getPreviousSibling(possibleMultiplier); + Element elementBeforeMultiplier = OpsinTools.getPreviousSibling(possibleMultiplier); //double multiplier indicates multiple suffixes which all have their infix multiplied //if currentInfixInformation contains more than 1 entry it contains information from an infix from before the multiplier so the interpretation of the multiplier as a suffix multiplier is impossible - if (elementBeforeMultiplier.getLocalName().equals(MULTIPLIER_EL) || currentInfixInformation.size() > 1){ + if (elementBeforeMultiplier.getName().equals(MULTIPLIER_EL) || currentInfixInformation.size() > 1){ multiplierKnownToIndicateInfixMultiplicationPresent =true; } possibleBracket = elementBeforeMultiplier; @@ -996,9 +1176,9 @@ possibleMultiplier=null; infix.detach(); } - if (possibleBracket.getLocalName().equals(STRUCTURALOPENBRACKET_EL)){ - Element bracket = (Element) XOMTools.getNextSibling(suffix); - if (!bracket.getLocalName().equals(STRUCTURALCLOSEBRACKET_EL)){ + if (possibleBracket.getName().equals(STRUCTURALOPENBRACKET_EL)){ + Element bracket = OpsinTools.getNextSibling(suffix); + if (!bracket.getName().equals(STRUCTURALCLOSEBRACKET_EL)){ throw new ComponentGenerationException("Matching closing bracket not found around infix/suffix block"); } if (possibleMultiplier!=null){ @@ -1040,7 +1220,7 @@ * @throws ComponentGenerationException */ private void processLambdaConvention(Element subOrRoot) throws ComponentGenerationException { - List lambdaConventionEls = XOMTools.getChildElementsWithTagName(subOrRoot, LAMBDACONVENTION_EL); + List lambdaConventionEls = subOrRoot.getChildElements(LAMBDACONVENTION_EL); boolean fusedRingPresent = false; if (lambdaConventionEls.size()>0){ if (subOrRoot.getChildElements(GROUP_EL).size()>1){ @@ -1049,57 +1229,57 @@ } for (Element lambdaConventionEl : lambdaConventionEls) { boolean frontLocantsExpected =false;//Is the lambdaConvention el followed by benz/benzo of a fused ring system (these have front locants which correspond to the final fused rings numbering) or by a polycylicspiro system - String[] lambdaValues = MATCH_COMMA.split(StringTools.removeDashIfPresent(lambdaConventionEl.getValue())); - Element possibleHeteroatomOrMultiplier = (Element) XOMTools.getNextSibling(lambdaConventionEl); + String[] lambdaValues = StringTools.removeDashIfPresent(lambdaConventionEl.getValue()).split(","); + Element possibleHeteroatomOrMultiplier = OpsinTools.getNextSibling(lambdaConventionEl); int heteroCount = 0; int multiplierValue = 1; while(possibleHeteroatomOrMultiplier != null){ - if(possibleHeteroatomOrMultiplier.getLocalName().equals(HETEROATOM_EL)) { + if(possibleHeteroatomOrMultiplier.getName().equals(HETEROATOM_EL)) { heteroCount+=multiplierValue; multiplierValue =1; - } else if (possibleHeteroatomOrMultiplier.getLocalName().equals(MULTIPLIER_EL)){ + } else if (possibleHeteroatomOrMultiplier.getName().equals(MULTIPLIER_EL)){ multiplierValue = Integer.parseInt(possibleHeteroatomOrMultiplier.getAttributeValue(VALUE_ATR)); } else{ break; } - possibleHeteroatomOrMultiplier = (Element)XOMTools.getNextSibling(possibleHeteroatomOrMultiplier); + possibleHeteroatomOrMultiplier = OpsinTools.getNextSibling(possibleHeteroatomOrMultiplier); } boolean assignLambdasToHeteroAtoms =false; if (lambdaValues.length==heteroCount){//heteroatom and number of locants +lambdas must match - if (fusedRingPresent && possibleHeteroatomOrMultiplier!=null && possibleHeteroatomOrMultiplier.getLocalName().equals(GROUP_EL) && possibleHeteroatomOrMultiplier.getAttributeValue(SUBTYPE_ATR).equals(HANTZSCHWIDMAN_SUBTYPE_VAL)){ + if (fusedRingPresent && possibleHeteroatomOrMultiplier!=null && possibleHeteroatomOrMultiplier.getName().equals(GROUP_EL) && possibleHeteroatomOrMultiplier.getAttributeValue(SUBTYPE_ATR).equals(HANTZSCHWIDMAN_SUBTYPE_VAL)){ //You must not set the locants of a HW system which forms a component of a fused ring system. The locant specified corresponds to the complete fused ring system. } else{ assignLambdasToHeteroAtoms =true; } } - else if (possibleHeteroatomOrMultiplier!=null && ((heteroCount==0 && XOMTools.getNextSibling(lambdaConventionEl).equals(possibleHeteroatomOrMultiplier) && - fusedRingPresent && possibleHeteroatomOrMultiplier.getLocalName().equals(GROUP_EL) && + else if (possibleHeteroatomOrMultiplier!=null && ((heteroCount==0 && OpsinTools.getNextSibling(lambdaConventionEl).equals(possibleHeteroatomOrMultiplier) && + fusedRingPresent && possibleHeteroatomOrMultiplier.getName().equals(GROUP_EL) && (possibleHeteroatomOrMultiplier.getValue().equals("benzo") || possibleHeteroatomOrMultiplier.getValue().equals("benz")) - && !((Element)XOMTools.getNextSibling(possibleHeteroatomOrMultiplier)).getLocalName().equals(FUSION_EL) - && !((Element)XOMTools.getNextSibling(possibleHeteroatomOrMultiplier)).getLocalName().equals(LOCANT_EL)) - || (possibleHeteroatomOrMultiplier.getLocalName().equals(POLYCYCLICSPIRO_EL) && + && !OpsinTools.getNextSibling(possibleHeteroatomOrMultiplier).getName().equals(FUSION_EL) + && !OpsinTools.getNextSibling(possibleHeteroatomOrMultiplier).getName().equals(LOCANT_EL)) + || (possibleHeteroatomOrMultiplier.getName().equals(POLYCYCLICSPIRO_EL) && (possibleHeteroatomOrMultiplier.getAttributeValue(VALUE_ATR).equals("spirobi")|| possibleHeteroatomOrMultiplier.getAttributeValue(VALUE_ATR).equals("spiroter"))))){ frontLocantsExpected = true;//a benzo fused ring e.g. 1lambda4,3-benzothiazole or a symmetrical poly cyclic spiro system } List heteroAtoms = new ArrayList();//contains the heteroatoms to apply the lambda values too. Can be empty if the values are applied to a group directly rather than to a heteroatom if (assignLambdasToHeteroAtoms){//populate heteroAtoms, multiplied heteroatoms are multiplied out Element multiplier = null; - Element heteroatomOrMultiplier = (Element) XOMTools.getNextSibling(lambdaConventionEl); + Element heteroatomOrMultiplier = OpsinTools.getNextSibling(lambdaConventionEl); while(heteroatomOrMultiplier != null){ - if(heteroatomOrMultiplier.getLocalName().equals(HETEROATOM_EL)) { + if(heteroatomOrMultiplier.getName().equals(HETEROATOM_EL)) { heteroAtoms.add(heteroatomOrMultiplier); if (multiplier!=null){ for (int i = 1; i < Integer.parseInt(multiplier.getAttributeValue(VALUE_ATR)); i++) { - Element newHeteroAtom = new Element(heteroatomOrMultiplier); - XOMTools.insertBefore(heteroatomOrMultiplier, newHeteroAtom); + Element newHeteroAtom = heteroatomOrMultiplier.copy(); + OpsinTools.insertBefore(heteroatomOrMultiplier, newHeteroAtom); heteroAtoms.add(newHeteroAtom); } multiplier.detach(); multiplier=null; } - } else if (heteroatomOrMultiplier.getLocalName().equals(MULTIPLIER_EL)){ + } else if (heteroatomOrMultiplier.getName().equals(MULTIPLIER_EL)){ if (multiplier !=null){ break; } @@ -1110,7 +1290,7 @@ else{ break; } - heteroatomOrMultiplier = (Element)XOMTools.getNextSibling(heteroatomOrMultiplier); + heteroatomOrMultiplier = OpsinTools.getNextSibling(heteroatomOrMultiplier); } } @@ -1137,12 +1317,12 @@ } } else{ - Element newLambda = new Element(LAMBDACONVENTION_EL); + Element newLambda = new TokenEl(LAMBDACONVENTION_EL); newLambda.addAttribute(valencyChange); if (locantAtr!=null){ newLambda.addAttribute(locantAtr); } - XOMTools.insertBefore(lambdaConventionEl, newLambda); + OpsinTools.insertBefore(lambdaConventionEl, newLambda); } } else{//just a locant e.g 1,3lambda5 @@ -1161,8 +1341,8 @@ lambdaConventionEl.detach(); } else{ - lambdaConventionEl.setLocalName(LOCANT_EL); - XOMTools.setTextChild(lambdaConventionEl, StringTools.arrayToString(lambdaValues, ",")); + lambdaConventionEl.setName(LOCANT_EL); + lambdaConventionEl.setValue(StringTools.arrayToString(lambdaValues, ",")); } } } @@ -1171,29 +1351,33 @@ * elements contained within in a big <bracket> element. * * @param substituentsAndRoot: The substituent/root elements at the current level of the tree - * @return Whether the method did something, and so needs to be called again. * @throws ComponentGenerationException */ - private boolean findAndStructureBrackets(List substituentsAndRoot) throws ComponentGenerationException { + private void findAndStructureBrackets(List substituentsAndRoot) throws ComponentGenerationException { int blevel = 0; Element openBracket = null; - Element closeBracket = null; + boolean nestedBrackets = false; for (Element sub : substituentsAndRoot) { - Elements children = sub.getChildElements(); - for(int i=0; i children = sub.getChildElements(); + for (Element child : children) { + String name = child.getName(); + if(name.equals(OPENBRACKET_EL)) { + blevel++; if(openBracket == null) { openBracket = child; } - blevel++; - } else if (child.getLocalName().equals(CLOSEBRACKET_EL)) { + else { + nestedBrackets = true; + } + } else if (name.equals(CLOSEBRACKET_EL)) { blevel--; if(blevel == 0) { - closeBracket = child; - Element bracket = structureBrackets(openBracket, closeBracket); - while(findAndStructureBrackets(XOMTools.getDescendantElementsWithTagName(bracket, SUBSTITUENT_EL))); - return true; + Element bracket = structureBrackets(openBracket, child); + if (nestedBrackets) { + findAndStructureBrackets(OpsinTools.getDescendantElementsWithTagNames(bracket, new String[]{SUBSTITUENT_EL, ROOT_EL})); + } + openBracket = null; + nestedBrackets = false; } } } @@ -1201,7 +1385,6 @@ if (blevel != 0){ throw new ComponentGenerationException("Brackets do not match!"); } - return false; } /**Places the elements in substituents containing/between an open and close bracket @@ -1213,40 +1396,40 @@ * @throws ComponentGenerationException */ private Element structureBrackets(Element openBracket, Element closeBracket) throws ComponentGenerationException { - Element bracket = new Element(BRACKET_EL); - XOMTools.insertBefore(openBracket.getParent(), bracket); + Element bracket = new GroupingEl(BRACKET_EL); + Element currentEl = openBracket.getParent(); + OpsinTools.insertBefore(currentEl, bracket); /* Pick up everything in the substituent before the bracket*/ - while(!openBracket.getParent().getChild(0).equals(openBracket)) { - Node n = openBracket.getParent().getChild(0); - n.detach(); - bracket.appendChild(n); + Element firstChild = currentEl.getChild(0); + while(!firstChild.equals(openBracket)) { + firstChild.detach(); + bracket.addChild(firstChild); + firstChild = currentEl.getChild(0); } - /* Pick up all nodes from the one with the open bracket, + /* Pick up all elements from the one with the open bracket, * to the one with the close bracket, inclusive. */ - Node currentNode = openBracket.getParent(); - while(!currentNode.equals(closeBracket.getParent())) { - Node nextNode = XOMTools.getNextSibling(currentNode); - currentNode.detach(); - bracket.appendChild(currentNode); - currentNode = nextNode; - if (currentNode==null){ + while(!currentEl.equals(closeBracket.getParent())) { + Element nextEl = OpsinTools.getNextSibling(currentEl); + currentEl.detach(); + bracket.addChild(currentEl); + currentEl = nextEl; + if (currentEl == null) { throw new ComponentGenerationException("Brackets within a word do not match!"); } } - currentNode.detach(); - bracket.appendChild(currentNode); - /* Pick up nodes after the close bracket */ - currentNode = XOMTools.getNextSibling(closeBracket); - while(currentNode != null) { - Node nextNode = XOMTools.getNextSibling(currentNode); - currentNode.detach(); - bracket.appendChild(currentNode); - currentNode = nextNode; + currentEl.detach(); + bracket.addChild(currentEl); + /* Pick up elements after the close bracket */ + currentEl = OpsinTools.getNextSibling(closeBracket); + while(currentEl != null) { + Element nextEl = OpsinTools.getNextSibling(currentEl); + currentEl.detach(); + bracket.addChild(currentEl); + currentEl = nextEl; } openBracket.detach(); closeBracket.detach(); - return bracket; } @@ -1255,7 +1438,7 @@ * @throws ComponentGenerationException */ private void processHydroCarbonRings(Element subOrRoot) throws ComponentGenerationException { - List annulens = XOMTools.getChildElementsWithTagName(subOrRoot, ANNULEN_EL); + List annulens = subOrRoot.getChildElements(ANNULEN_EL); for (Element annulen : annulens) { String annulenValue =annulen.getValue(); Matcher match = matchAnnulene.matcher(annulenValue); @@ -1273,22 +1456,20 @@ String SMILES = "c1" +StringTools.multiplyString("c", annulenSize -1); SMILES += "1"; - Element group =new Element(GROUP_EL); + Element group =new TokenEl(GROUP_EL, annulenValue); group.addAttribute(new Attribute(VALUE_ATR, SMILES)); - group.addAttribute(new Attribute(VALTYPE_ATR, SMILES_VALTYPE_VAL)); + group.addAttribute(new Attribute(LABELS_ATR, NUMERIC_LABELS_VAL)); group.addAttribute(new Attribute(TYPE_ATR, RING_TYPE_VAL)); - group.addAttribute(new Attribute(SUBTYPE_ATR, ARYLGROUP_SUBTYPE_VAL)); - group.appendChild(annulenValue); + group.addAttribute(new Attribute(SUBTYPE_ATR, RING_SUBTYPE_VAL)); annulen.getParent().replaceChild(annulen, group); } - List hydrocarbonFRSystems = XOMTools.getChildElementsWithTagName(subOrRoot, HYDROCARBONFUSEDRINGSYSTEM_EL); + List hydrocarbonFRSystems = subOrRoot.getChildElements(HYDROCARBONFUSEDRINGSYSTEM_EL); for (Element hydrocarbonFRSystem : hydrocarbonFRSystems) { - Element multiplier = (Element)XOMTools.getPreviousSibling(hydrocarbonFRSystem); - if(multiplier != null && multiplier.getLocalName().equals(MULTIPLIER_EL)) { + Element multiplier = OpsinTools.getPreviousSibling(hydrocarbonFRSystem); + if(multiplier != null && multiplier.getName().equals(MULTIPLIER_EL)) { int multiplierValue =Integer.parseInt(multiplier.getAttributeValue(VALUE_ATR)); String classOfHydrocarbonFRSystem =hydrocarbonFRSystem.getAttributeValue(VALUE_ATR); - Element newGroup =new Element(GROUP_EL); StringBuilder smilesSB= new StringBuilder(); if (classOfHydrocarbonFRSystem.equals("polyacene")){ if (multiplierValue <=3){ @@ -1401,13 +1582,11 @@ else{ throw new ComponentGenerationException("Unknown semi-trivially named hydrocarbon fused ring system"); } - + Element newGroup =new TokenEl(GROUP_EL, multiplier.getValue() + hydrocarbonFRSystem.getValue()); newGroup.addAttribute(new Attribute(VALUE_ATR, smilesSB.toString())); - newGroup.addAttribute(new Attribute(VALTYPE_ATR, SMILES_VALTYPE_VAL)); newGroup.addAttribute(new Attribute(LABELS_ATR, FUSEDRING_LABELS_VAL)); newGroup.addAttribute(new Attribute(TYPE_ATR, RING_TYPE_VAL)); newGroup.addAttribute(new Attribute(SUBTYPE_ATR, HYDROCARBONFUSEDRINGSYSTEM_EL)); - newGroup.appendChild(multiplier.getValue() + hydrocarbonFRSystem.getValue()); hydrocarbonFRSystem.getParent().replaceChild(hydrocarbonFRSystem, newGroup); multiplier.detach(); } @@ -1423,12 +1602,12 @@ * @throws ComponentGenerationException */ private void handleSuffixIrregularities(Element subOrRoot) throws ComponentGenerationException { - List suffixes = XOMTools.getChildElementsWithTagName(subOrRoot, SUFFIX_EL); + List suffixes = subOrRoot.getChildElements(SUFFIX_EL); for (Element suffix : suffixes) { String suffixValue = suffix.getValue(); if (suffixValue.equals("ic") || suffixValue.equals("ous")){ if (!n2sConfig.allowInterpretationOfAcidsWithoutTheWordAcid()) { - Node next = XOMTools.getNext(suffix); + Element next = OpsinTools.getNext(suffix); if (next == null){ throw new ComponentGenerationException("\"acid\" not found after " +suffixValue); } @@ -1437,41 +1616,39 @@ // convert quinone to dione else if (suffixValue.equals("quinone") || suffixValue.equals("quinon")){ suffix.removeAttribute(suffix.getAttribute(ADDITIONALVALUE_ATR)); - XOMTools.setTextChild(suffix, "one"); - Element multiplier = (Element) XOMTools.getPreviousSibling(suffix); - if (multiplier.getLocalName().equals(MULTIPLIER_EL)){ + suffix.setValue("one"); + Element multiplier = OpsinTools.getPreviousSibling(suffix); + if (multiplier.getName().equals(MULTIPLIER_EL)){ Attribute multVal = multiplier.getAttribute(VALUE_ATR); int newMultiplier = Integer.parseInt(multVal.getValue()) * 2; multVal.setValue(String.valueOf(newMultiplier)); } else{ - multiplier = new Element(MULTIPLIER_EL); + multiplier = new TokenEl(MULTIPLIER_EL, "di"); multiplier.addAttribute(new Attribute(VALUE_ATR, "2")); - multiplier.appendChild("di"); - XOMTools.insertBefore(suffix, multiplier); + OpsinTools.insertBefore(suffix, multiplier); } } else if (suffixValue.equals("ylene") || suffixValue.equals("ylen")){ suffix.removeAttribute(suffix.getAttribute(ADDITIONALVALUE_ATR)); - XOMTools.setTextChild(suffix, "yl"); - Element alk = (Element) XOMTools.getPreviousSibling(suffix, GROUP_EL); + suffix.setValue("yl"); + Element alk = OpsinTools.getPreviousSibling(suffix, GROUP_EL); if (alk.getAttribute(USABLEASJOINER_ATR)!=null){ - alk.getAttribute(USABLEASJOINER_ATR).detach(); + alk.removeAttribute(alk.getAttribute(USABLEASJOINER_ATR)); } - Element multiplier = new Element(MULTIPLIER_EL); + Element multiplier = new TokenEl(MULTIPLIER_EL, "di"); multiplier.addAttribute(new Attribute(VALUE_ATR, "2")); - multiplier.appendChild("di"); - XOMTools.insertBefore(suffix, multiplier); + OpsinTools.insertBefore(suffix, multiplier); } else if (suffixValue.equals("ylium") &&//disambiguate between ylium the charge modifying suffix and ylium the acylium suffix "acylium".equals(suffix.getAttributeValue(VALUE_ATR)) && suffix.getAttribute(SUFFIXPREFIX_ATR)==null && suffix.getAttribute(INFIX_ATR)==null){ - Element group = (Element) XOMTools.getPreviousSibling(suffix, GROUP_EL); + Element group = OpsinTools.getPreviousSibling(suffix, GROUP_EL); if (group==null || (!ACIDSTEM_TYPE_VAL.equals(group.getAttributeValue(TYPE_ATR)) && !CHALCOGENACIDSTEM_TYPE_VAL.equals(group.getAttributeValue(TYPE_ATR)) && !NONCARBOXYLICACID_TYPE_VAL.equals(group.getAttributeValue(TYPE_ATR)))){ - Element beforeSuffix = (Element) XOMTools.getPreviousSibling(suffix); + Element beforeSuffix = OpsinTools.getPreviousSibling(suffix); String o = beforeSuffix.getAttributeValue(SUBSEQUENTUNSEMANTICTOKEN_ATR); if (o ==null || !StringTools.endsWithCaseInsensitive(o, "o")){ if (group!=null && ARYLSUBSTITUENT_SUBTYPE_VAL.equals(group.getAttributeValue(SUBTYPE_ATR))){ @@ -1486,6 +1663,34 @@ } } } + else if (suffixValue.equals("nitrolic acid") || suffixValue.equals("nitrolicacid")) { + Element precedingGroup = OpsinTools.getPreviousSibling(suffix, GROUP_EL); + if (precedingGroup == null){ + if (subOrRoot.getChildCount() != 1) { + throw new RuntimeException("OPSIN Bug: nitrolic acid not expected to have sibilings"); + } + Element precedingSubstituent = OpsinTools.getPreviousSibling(subOrRoot); + if(precedingSubstituent == null || !precedingSubstituent.getName().equals(SUBSTITUENT_EL)){ + throw new ComponentGenerationException("Expected substituent before nitrolic acid"); + } + + List existingSuffixes = precedingSubstituent.getChildElements(SUFFIX_EL); + if (existingSuffixes.size() == 1) { + if (!existingSuffixes.get(0).getValue().equals("yl")){ + throw new ComponentGenerationException("Unexpected suffix found before nitrolic acid"); + } + existingSuffixes.get(0).detach(); + for (Element child : precedingSubstituent.getChildElements()) { + child.detach(); + OpsinTools.insertBefore(suffix, child); + } + precedingSubstituent.detach(); + } + else{ + throw new ComponentGenerationException("Only the nitrolic acid case where it is preceded by an yl suffix is supported"); + } + } + } } } @@ -1495,10 +1700,14 @@ */ private void detectAlkaneFusedRingBridges(Element group) { if (ALKANESTEM_SUBTYPE_VAL.equals(group.getAttributeValue(SUBTYPE_ATR))){ - Element possibleBridgeFormer = XOMTools.getNextSiblingIgnoringCertainElements(group, new String[]{UNSATURATOR_EL}); - if(possibleBridgeFormer!=null && possibleBridgeFormer.getLocalName().equals(BRIDGEFORMINGO_EL)){ - possibleBridgeFormer.detach(); - group.setLocalName(FUSEDRINGBRIDGE_EL); + Element unsaturator = OpsinTools.getNextSibling(group); + if (unsaturator != null && unsaturator.getName().equals(UNSATURATOR_EL)) { + Element possibleBridgeFormer = OpsinTools.getNextSiblingIgnoringCertainElements(group, new String[]{UNSATURATOR_EL}); + if(possibleBridgeFormer != null && possibleBridgeFormer.getName().equals(BRIDGEFORMINGO_EL)){ + group.setName(FUSEDRINGBRIDGE_EL); + possibleBridgeFormer.detach(); + unsaturator.detach(); + } } } } @@ -1510,9 +1719,9 @@ * @throws ComponentGenerationException */ private void processRings(Element group) throws ComponentGenerationException { - Element previous = (Element)XOMTools.getPreviousSibling(group); + Element previous = OpsinTools.getPreviousSiblingIgnoringCertainElements(group, new String[]{LOCANT_EL}); if(previous != null) { - String previousElType = previous.getLocalName(); + String previousElType = previous.getName(); if(previousElType.equals(SPIRO_EL)){ processSpiroSystem(group, previous); } else if(previousElType.equals(VONBAEYER_EL)) { @@ -1535,9 +1744,9 @@ private void processSpiroSystem(Element chainGroup, Element spiroEl) throws NumberFormatException, ComponentGenerationException { int[][] spiroDescriptors = getSpiroDescriptors(StringTools.removeDashIfPresent(spiroEl.getValue())); - Element multiplier =(Element)XOMTools.getPreviousSibling(spiroEl); + Element multiplier = OpsinTools.getPreviousSibling(spiroEl); int numberOfSpiros = 1; - if (multiplier != null && multiplier.getLocalName().equals(MULTIPLIER_EL)){ + if (multiplier != null && multiplier.getName().equals(MULTIPLIER_EL) && BASIC_TYPE_VAL.equals(multiplier.getAttributeValue(TYPE_ATR))) { numberOfSpiros = Integer.parseInt(multiplier.getAttributeValue(VALUE_ATR)); multiplier.detach(); } @@ -1693,44 +1902,39 @@ */ private void processVonBaeyerSystem(Element chainEl, Element vonBaeyerBracketEl) throws ComponentGenerationException { String vonBaeyerBracket = StringTools.removeDashIfPresent(vonBaeyerBracketEl.getValue()); - Element multiplier =(Element)XOMTools.getPreviousSibling(vonBaeyerBracketEl); + Element multiplier = OpsinTools.getPreviousSibling(vonBaeyerBracketEl); int numberOfRings=Integer.parseInt(multiplier.getAttributeValue(VALUE_ATR)); multiplier.detach(); int alkylChainLength; - LinkedList elementSymbolArray = new LinkedList(); - if (chainEl.getAttributeValue(VALTYPE_ATR).equals(SMILES_VALTYPE_VAL)){ - String smiles =chainEl.getAttributeValue(VALUE_ATR); - char[] smilesArray =smiles.toCharArray(); - for (int i = 0; i < smilesArray.length; i++) {//only able to interpret the SMILES that should be in an unmodified unbranched chain - char currentChar =smilesArray[i]; - if (currentChar == '['){ - if ( smilesArray[i +2]==']'){ - elementSymbolArray.add("[" +String.valueOf(smilesArray[i+1]) +"]"); - i=i+2; - } - else{ - elementSymbolArray.add("[" + String.valueOf(smilesArray[i+1]) +String.valueOf(smilesArray[i+2]) +"]"); - i=i+3; - } + Deque elementSymbolArray = new ArrayDeque(); + String smiles =chainEl.getAttributeValue(VALUE_ATR); + char[] smilesArray =smiles.toCharArray(); + for (int i = 0; i < smilesArray.length; i++) {//only able to interpret the SMILES that should be in an unmodified unbranched chain + char currentChar =smilesArray[i]; + if (currentChar == '['){ + if ( smilesArray[i +2]==']'){ + elementSymbolArray.add("[" +String.valueOf(smilesArray[i+1]) +"]"); + i=i+2; } else{ - elementSymbolArray.add(String.valueOf(currentChar)); + elementSymbolArray.add("[" + String.valueOf(smilesArray[i+1]) +String.valueOf(smilesArray[i+2]) +"]"); + i=i+3; } } - alkylChainLength=elementSymbolArray.size(); - } - else{ - throw new ComponentGenerationException("unexpected group valType: " + chainEl.getAttributeValue(VALTYPE_ATR)); + else{ + elementSymbolArray.add(String.valueOf(currentChar)); + } } + alkylChainLength=elementSymbolArray.size(); int totalLengthOfBridges=0; int bridgeLabelsUsed=3;//start labelling from 3 upwards //3 and 4 will be the atoms on each end of one secondary bridge, 5 and 6 for the next etc. - ArrayList> bridges = new ArrayList>(); - HashMap> bridgeLocations = new HashMap>(alkylChainLength); + List> bridges = new ArrayList>(); + Map> bridgeLocations = new HashMap>(alkylChainLength); if (vonBaeyerBracket.indexOf("-")==5){ vonBaeyerBracket = vonBaeyerBracket.substring(7, vonBaeyerBracket.length()-1);//cut off cyclo-[ and terminal ] } @@ -1746,8 +1950,8 @@ if (i > 2){//this is a secondary bridge (chain start/end locations should have been specified) i++; String coordinatesStr1; - String coordinatesStr2 = bridgeDescriptors[i].replaceAll("\\D", ""); - String[] tempArray = bridgeDescriptor.split("\\D+"); + String coordinatesStr2 = matchNonDigit.matcher(bridgeDescriptors[i]).replaceAll(""); + String[] tempArray = matchNonDigit.split(bridgeDescriptor); if (tempArray.length ==1){ //there is some ambiguity as it has not been made obvious which number/s are supposed to be the superscripted locant @@ -1870,7 +2074,7 @@ //create list of secondary bridges that need to be added //0 length bridges and the 3 main bridges are dropped - ArrayList> secondaryBridges = new ArrayList>(); + List> secondaryBridges = new ArrayList>(); for (HashMap bridge : bridges) { if(bridge.get("AtomId_Larger")!=null && bridge.get("Bridge Length")!=0){ secondaryBridges.add(bridge); @@ -1880,7 +2084,7 @@ Comparator> sortBridges= new VonBaeyerSecondaryBridgeSort(); Collections.sort(secondaryBridges, sortBridges); - ArrayList> dependantSecondaryBridges; + List> dependantSecondaryBridges; //add secondary bridges, recursively add dependent secondary bridges do{ dependantSecondaryBridges = new ArrayList>(); @@ -1977,33 +2181,44 @@ if (!n2sConfig.allowInterpretationOfAcidsWithoutTheWordAcid()) { if (group.getAttribute(FUNCTIONALIDS_ATR) !=null && (groupValue.endsWith("ic") || groupValue.endsWith("ous"))){ - Node next = XOMTools.getNext(group); + Element next = OpsinTools.getNext(group); if (next == null){ throw new ComponentGenerationException("\"acid\" not found after " +groupValue); } } } - + if(groupValue.equals("thiophen") || groupValue.equals("selenophen") || groupValue.equals("tellurophen")) {//thiophenol is generally phenol with an O replaced with S not thiophene with a hydroxy - Element possibleSuffix = (Element) XOMTools.getNextSibling(group); - if (!"e".equals(group.getAttributeValue(SUBSEQUENTUNSEMANTICTOKEN_ATR)) && possibleSuffix !=null && possibleSuffix.getLocalName().equals(SUFFIX_EL)) { + Element possibleSuffix = OpsinTools.getNextSibling(group); + if (!"e".equals(group.getAttributeValue(SUBSEQUENTUNSEMANTICTOKEN_ATR)) && possibleSuffix !=null && possibleSuffix.getName().equals(SUFFIX_EL)) { if (possibleSuffix.getValue().startsWith("ol")){ - Element isThisALocant =(Element)XOMTools.getPreviousSibling(group); - if (isThisALocant == null || !isThisALocant.getLocalName().equals(LOCANT_EL) || MATCH_COMMA.split(isThisALocant.getValue()).length != 1){ + Element isThisALocant = OpsinTools.getPreviousSibling(group); + if (isThisALocant == null || !isThisALocant.getName().equals(LOCANT_EL) || isThisALocant.getValue().split(",").length != 1){ throw new ComponentGenerationException(groupValue + "ol has been incorrectly interpreted as "+ groupValue+", ol instead of phenol with the oxgen replaced"); } } } } + else if(groupValue.equals("chromen")) {//chromene in IUPAC nomenclature is fully unsaturated, but sometimes is instead considered to be chromane with a front locanted double bond + Element possibleLocant = OpsinTools.getPreviousSibling(group); + if (possibleLocant != null && possibleLocant.getName().equals(LOCANT_EL) && + (possibleLocant.getValue().equals("2") || possibleLocant.getValue().equals("3"))) { + Element possibleSuffix = OpsinTools.getNextSibling(group); + if (possibleSuffix == null || possibleSuffix.getName().equals(LOCANT_EL)){//if there is a suffix assume the locant refers to that rather than the double bond + group.getAttribute(VALUE_ATR).setValue("O1CCCc2ccccc12"); + group.addAttribute(ADDBOND_ATR, "2 locant required"); + group.addAttribute(FRONTLOCANTSEXPECTED_ATR, "2,3"); + } + } + } else if (groupValue.equals("methylene") || groupValue.equals("methylen")) {//e.g. 3,4-methylenedioxyphenyl - Element nextSub = (Element) XOMTools.getNextSibling(group.getParent()); - if (nextSub !=null && nextSub.getLocalName().equals(SUBSTITUENT_EL) && XOMTools.getNextSibling(group)==null - && (XOMTools.getPreviousSibling(group)==null || !((Element)XOMTools.getPreviousSibling(group)).getLocalName().equals(MULTIPLIER_EL))){//not trimethylenedioxy - Elements children = nextSub.getChildElements(); + Element nextSub = OpsinTools.getNextSibling(group.getParent()); + if (nextSub !=null && nextSub.getName().equals(SUBSTITUENT_EL) && OpsinTools.getNextSibling(group)==null + && (OpsinTools.getPreviousSibling(group)==null || !OpsinTools.getPreviousSibling(group).getName().equals(MULTIPLIER_EL))){//not trimethylenedioxy + List children = nextSub.getChildElements(); if (children.size() >=2 && children.get(0).getValue().equals("di")&& children.get(1).getValue().equals("oxy")){ - XOMTools.setTextChild(group, groupValue + "dioxy"); + group.setValue(groupValue + "dioxy"); group.getAttribute(VALUE_ATR).setValue("C(O)O"); - group.getAttribute(VALTYPE_ATR).setValue(SMILES_VALTYPE_VAL); group.getAttribute(OUTIDS_ATR).setValue("2,3"); group.getAttribute(SUBTYPE_ATR).setValue(EPOXYLIKE_SUBTYPE_VAL); if (group.getAttribute(LABELS_ATR)!=null){ @@ -2015,16 +2230,16 @@ nextSub.detach(); for (int i = children.size() -1 ; i >=2; i--) { children.get(i).detach(); - XOMTools.insertAfter(group, children.get(i)); + OpsinTools.insertAfter(group, children.get(i)); } } } } else if (groupValue.equals("ethylene") || groupValue.equals("ethylen")) { - Element previous = (Element)XOMTools.getPreviousSibling(group); - if (previous!=null && previous.getLocalName().equals(MULTIPLIER_EL)){ + Element previous = OpsinTools.getPreviousSibling(group); + if (previous != null && previous.getName().equals(MULTIPLIER_EL)){ int multiplierValue = Integer.parseInt(previous.getAttributeValue(VALUE_ATR)); - Element possibleRoot =(Element) XOMTools.getNextSibling(group.getParent()); + Element possibleRoot = OpsinTools.getNextSibling(group.getParent()); if (possibleRoot==null && OpsinTools.getParentWordRule(group).getAttributeValue(WORDRULE_ATR).equals(WordRule.glycol.toString())){//e.g. dodecaethylene glycol StringBuilder smiles = new StringBuilder("CC"); for (int i = 1; i < multiplierValue; i++) { @@ -2040,12 +2255,12 @@ group.addAttribute(new Attribute(LABELS_ATR, NUMERIC_LABELS_VAL)); } } - else if (possibleRoot!=null && possibleRoot.getLocalName().equals(ROOT_EL)){ - Elements children = possibleRoot.getChildElements(); + else if (possibleRoot!=null && possibleRoot.getName().equals(ROOT_EL)){ + List children = possibleRoot.getChildElements(); if (children.size()==2){ Element amineMultiplier =children.get(0); Element amine =children.get(1); - if (amineMultiplier.getLocalName().equals(MULTIPLIER_EL) && (amine.getValue().equals("amine") || amine.getValue().equals("amin"))){//e.g. Triethylenetetramine + if (amineMultiplier.getName().equals(MULTIPLIER_EL) && (amine.getValue().equals("amine") || amine.getValue().equals("amin"))){//e.g. Triethylenetetramine if (Integer.parseInt(amineMultiplier.getAttributeValue(VALUE_ATR))!=multiplierValue +1){ throw new ComponentGenerationException("Invalid polyethylene amine!"); } @@ -2058,7 +2273,7 @@ group.getAttribute(VALUE_ATR).setValue(smiles.toString()); previous.detach(); possibleRoot.detach(); - ((Element)group.getParent()).setLocalName(ROOT_EL); + group.getParent().setName(ROOT_EL); if (group.getAttribute(LABELS_ATR)!=null){//use numeric numbering group.getAttribute(LABELS_ATR).setValue(NUMERIC_LABELS_VAL); } @@ -2070,13 +2285,12 @@ } } else{ - Element nextSub = (Element) XOMTools.getNextSibling(group.getParent()); - if (nextSub !=null && nextSub.getLocalName().equals(SUBSTITUENT_EL) && XOMTools.getNextSibling(group)==null){ - Elements children = nextSub.getChildElements(); + Element nextSub = OpsinTools.getNextSibling(group.getParent()); + if (nextSub !=null && nextSub.getName().equals(SUBSTITUENT_EL) && OpsinTools.getNextSibling(group)==null){ + List children = nextSub.getChildElements(); if (children.size() >=2 && children.get(0).getValue().equals("di")&& children.get(1).getValue().equals("oxy")){ - XOMTools.setTextChild(group, groupValue + "dioxy"); + group.setValue(groupValue + "dioxy"); group.getAttribute(VALUE_ATR).setValue("C(O)CO"); - group.getAttribute(VALTYPE_ATR).setValue(SMILES_VALTYPE_VAL); group.getAttribute(OUTIDS_ATR).setValue("2,4"); group.getAttribute(SUBTYPE_ATR).setValue(EPOXYLIKE_SUBTYPE_VAL); if (group.getAttribute(LABELS_ATR)!=null){ @@ -2088,17 +2302,17 @@ nextSub.detach(); for (int i = children.size() -1 ; i >=2; i--) { children.get(i).detach(); - XOMTools.insertAfter(group, children.get(i)); + OpsinTools.insertAfter(group, children.get(i)); } } } } } else if (groupValue.equals("propylene") || groupValue.equals("propylen")) { - Element previous = (Element)XOMTools.getPreviousSibling(group); - if (previous!=null && previous.getLocalName().equals(MULTIPLIER_EL)){ + Element previous = OpsinTools.getPreviousSibling(group); + if (previous!=null && previous.getName().equals(MULTIPLIER_EL)){ int multiplierValue = Integer.parseInt(previous.getAttributeValue(VALUE_ATR)); - Element possibleRoot =(Element) XOMTools.getNextSibling(group.getParent()); + Element possibleRoot = OpsinTools.getNextSibling(group.getParent()); if (possibleRoot==null && OpsinTools.getParentWordRule(group).getAttributeValue(WORDRULE_ATR).equals(WordRule.glycol.toString())){//e.g. dodecaethylene glycol StringBuilder smiles =new StringBuilder("CCC"); for (int i = 1; i < multiplierValue; i++) { @@ -2120,19 +2334,18 @@ //acridone (not codified), anthrone, phenanthrone and xanthone have the one at position 9 by default else if (groupValue.equals("anthr") || groupValue.equals("phenanthr") || groupValue.equals("acrid") || groupValue.equals("xanth") || groupValue.equals("thioxanth") || groupValue.equals("selenoxanth")|| groupValue.equals("telluroxanth")|| groupValue.equals("xanthen")) { - Element possibleLocant = (Element) XOMTools.getPreviousSibling(group); - if (possibleLocant==null || !possibleLocant.getLocalName().equals(LOCANT_EL)){//only need to give one a locant of 9 if no locant currently present - Element possibleSuffix =(Element) XOMTools.getNextSibling(group); + Element possibleLocant = OpsinTools.getPreviousSibling(group); + if (possibleLocant==null || !possibleLocant.getName().equals(LOCANT_EL)){//only need to give one a locant of 9 if no locant currently present + Element possibleSuffix = OpsinTools.getNextSibling(group); if (possibleSuffix!=null && "one".equals(possibleSuffix.getAttributeValue(VALUE_ATR))){ //Rule C-315.2 - Element newLocant =new Element(LOCANT_EL); - newLocant.appendChild("9"); - XOMTools.insertBefore(possibleSuffix, newLocant); - Element newAddedHydrogen = new Element(ADDEDHYDROGEN_EL); + Element newLocant =new TokenEl(LOCANT_EL, "9"); + OpsinTools.insertBefore(possibleSuffix, newLocant); + Element newAddedHydrogen = new TokenEl(ADDEDHYDROGEN_EL); newAddedHydrogen.addAttribute(new Attribute(LOCANT_ATR, "10")); - XOMTools.insertBefore(newLocant, newAddedHydrogen); + OpsinTools.insertBefore(newLocant, newAddedHydrogen); } - else if (possibleSuffix!=null && possibleSuffix.getLocalName().equals(SUFFIX_EL) && + else if (possibleSuffix!=null && possibleSuffix.getName().equals(SUFFIX_EL) && groupValue.equals("xanth") || groupValue.equals("thioxanth") || groupValue.equals("selenoxanth")|| groupValue.equals("telluroxanth")){ //diasambiguate between xanthate/xanthic acid and xanthene String suffixVal = possibleSuffix.getAttributeValue(VALUE_ATR); @@ -2143,60 +2356,40 @@ } } else if (groupValue.equals("phospho")){//is this the organic meaning (P(=O)=O) or biochemical meaning (P(=O)(O)O) - Element substituent = (Element) group.getParent(); - Element nextSubstituent = (Element) XOMTools.getNextSibling(substituent); - if (nextSubstituent !=null){ - Element nextGroup = nextSubstituent.getFirstChildElement(GROUP_EL); - String type = nextGroup.getAttributeValue(TYPE_ATR); - String subType = nextGroup.getAttributeValue(SUBTYPE_ATR); - if (nextGroup !=null && (type.equals(AMINOACID_TYPE_VAL) || + Element wordRule = OpsinTools.getParentWordRule(group); + for (Element otherGroup : OpsinTools.getDescendantElementsWithTagName(wordRule, GROUP_EL)) { + String type = otherGroup.getAttributeValue(TYPE_ATR); + String subType = otherGroup.getAttributeValue(SUBTYPE_ATR); + if (type.equals(AMINOACID_TYPE_VAL) || type.equals(CARBOHYDRATE_TYPE_VAL) || BIOCHEMICAL_SUBTYPE_VAL.equals(subType) || (YLFORACYL_SUBTYPE_VAL.equals(subType) && - ("glycol".equals(nextGroup.getValue()) || "diglycol".equals(nextGroup.getValue()))) - )){ + ("glycol".equals(otherGroup.getValue()) || "diglycol".equals(otherGroup.getValue())) + ) + ) { group.getAttribute(VALUE_ATR).setValue("-P(=O)(O)O"); group.addAttribute(new Attribute(USABLEASJOINER_ATR, "yes")); - } - } - - } - else if (groupValue.equals("aspart") || groupValue.equals("glutam")){//aspartyl and glutamyl typically mean alpha-aspartyl/alpha-glutamyl - if (group.getAttributeValue(SUBTYPE_ATR).equals(ENDINIC_SUBTYPE_VAL)){ - Element yl = (Element) XOMTools.getNextSibling(group); - if (yl.getAttributeValue(VALUE_ATR).equals("yl")){ - group.removeAttribute(group.getAttribute(SUFFIXAPPLIESTO_ATR)); - if (groupValue.equals("aspart")){ - group.getAttribute("labels").setValue("/2,alpha/3,beta/4,gamma///1/"); - group.getAttribute(VALUE_ATR).setValue("N[C@@H](CC(O)=O)C=O"); - group.addAttribute(new Attribute(OUTIDS_ATR, "7")); - } - else { - group.getAttribute("labels").setValue("/2,alpha/3,beta/4,gamma/5,delta///1/"); - group.getAttribute(VALUE_ATR).setValue("N[C@@H](CCC(O)=O)C=O"); - group.addAttribute(new Attribute(OUTIDS_ATR, "8")); - } - yl.detach(); + break; } } } else if (groupValue.equals("hydrogen")){ - Element hydrogenParentEl = (Element) group.getParent(); - Element nextSubOrRoot = (Element) XOMTools.getNextSibling(hydrogenParentEl); + Element hydrogenParentEl = group.getParent(); + Element nextSubOrRoot = OpsinTools.getNextSibling(hydrogenParentEl); if (nextSubOrRoot!=null){ - Element possibleSuitableAteGroup = (Element) nextSubOrRoot.getChild(0); - if (!possibleSuitableAteGroup.getLocalName().equals(GROUP_EL) || !NONCARBOXYLICACID_TYPE_VAL.equals(possibleSuitableAteGroup.getAttributeValue(TYPE_ATR))){ + Element possibleSuitableAteGroup = nextSubOrRoot.getChild(0); + if (!possibleSuitableAteGroup.getName().equals(GROUP_EL) || !NONCARBOXYLICACID_TYPE_VAL.equals(possibleSuitableAteGroup.getAttributeValue(TYPE_ATR))){ throw new ComponentGenerationException("Hydrogen is not meant as a substituent in this context!"); } - Element possibleMultiplier = (Element) XOMTools.getPreviousSibling(group); + Element possibleMultiplier = OpsinTools.getPreviousSibling(group); String multiplier = "1"; - if (possibleMultiplier!=null && possibleMultiplier.getLocalName().equals(MULTIPLIER_EL)){ + if (possibleMultiplier!=null && possibleMultiplier.getName().equals(MULTIPLIER_EL)){ multiplier = possibleMultiplier.getAttributeValue(VALUE_ATR); possibleMultiplier.detach(); } possibleSuitableAteGroup.addAttribute(new Attribute(NUMBEROFFUNCTIONALATOMSTOREMOVE_ATR, multiplier)); group.detach(); - Elements childrenToMove = hydrogenParentEl.getChildElements(); + List childrenToMove = hydrogenParentEl.getChildElements(); for (int i = childrenToMove.size() -1 ; i >=0; i--) { childrenToMove.get(i).detach(); nextSubOrRoot.insertChild(childrenToMove.get(i), 0); @@ -2206,69 +2399,78 @@ } else if (groupValue.equals("acryl")){ if (SIMPLESUBSTITUENT_SUBTYPE_VAL.equals(group.getAttributeValue(SUBTYPE_ATR))){ - Element nextEl = (Element) XOMTools.getNext(group); + Element nextEl = OpsinTools.getNext(group); if (nextEl!=null && nextEl.getValue().equals("amid")){ throw new ComponentGenerationException("amide in acrylamide is not [NH2-]"); } } } else if (groupValue.equals("azo") || groupValue.equals("azoxy") || groupValue.equals("nno-azoxy") || groupValue.equals("non-azoxy") || groupValue.equals("onn-azoxy") || groupValue.equals("diazoamino") || groupValue.equals("hydrazo") ){ - Element enclosingSub = (Element) group.getParent(); - Element next = XOMTools.getNextSiblingIgnoringCertainElements(enclosingSub, new String[]{HYPHEN_EL}); - if (next==null && XOMTools.getPreviousSibling(enclosingSub)==null){//e.g. [(E)-NNO-azoxy]benzene - next = XOMTools.getNextSiblingIgnoringCertainElements((Element) enclosingSub.getParent(), new String[]{HYPHEN_EL}); - } - if (next!=null && next.getLocalName().equals(ROOT_EL)){ - if (!(((Element)next.getChild(0)).getLocalName().equals(MULTIPLIER_EL))){ - List suffixes = XOMTools.getChildElementsWithTagName(next, SUFFIX_EL); + Element enclosingSub = group.getParent(); + Element next = OpsinTools.getNextSiblingIgnoringCertainElements(enclosingSub, new String[]{HYPHEN_EL}); + if (next==null && OpsinTools.getPreviousSibling(enclosingSub) == null){//e.g. [(E)-NNO-azoxy]benzene + next = OpsinTools.getNextSiblingIgnoringCertainElements(enclosingSub.getParent(), new String[]{HYPHEN_EL}); + } + if (next!=null && next.getName().equals(ROOT_EL)){ + if (!(next.getChild(0).getName().equals(MULTIPLIER_EL))){ + List suffixes = next.getChildElements(SUFFIX_EL); if (suffixes.size()==0){//only case without locants is handled so far. suffixes only apply to one of the fragments rather than both!!! - Element newMultiplier = new Element(MULTIPLIER_EL); + Element newMultiplier = new TokenEl(MULTIPLIER_EL); newMultiplier.addAttribute(new Attribute(VALUE_ATR, "2")); next.insertChild(newMultiplier, 0); - Element interSubstituentHyphen = (Element) XOMTools.getPrevious(group); - if (interSubstituentHyphen!=null && !interSubstituentHyphen.getLocalName().equals(HYPHEN_EL)){//prevent implicit bracketting - XOMTools.insertAfter(interSubstituentHyphen, new Element(HYPHEN_EL)); + Element interSubstituentHyphen = OpsinTools.getPrevious(group); + if (interSubstituentHyphen!=null && !interSubstituentHyphen.getName().equals(HYPHEN_EL)){//prevent implicit bracketting + OpsinTools.insertAfter(interSubstituentHyphen, new TokenEl(HYPHEN_EL)); } } } } } else if (groupValue.equals("coenzyme a") || groupValue.equals("coa")){ - Element enclosingSubOrRoot = (Element) group.getParent(); - Element previous = (Element) XOMTools.getPreviousSibling(enclosingSubOrRoot); - if (previous!=null){ - List groups = XOMTools.getDescendantElementsWithTagName(previous, GROUP_EL); - if (groups.size()>0){ - Element possibleAcid = groups.get(groups.size()-1); + Element enclosingSubOrRoot = group.getParent(); + Element previous = OpsinTools.getPreviousSibling(enclosingSubOrRoot); + if (previous != null){ + List groups = OpsinTools.getDescendantElementsWithTagName(previous, GROUP_EL); + if (groups.size() > 0){ + Element possibleAcid = groups.get(groups.size() - 1); if (ACIDSTEM_TYPE_VAL.equals(possibleAcid.getAttributeValue(TYPE_ATR))){ - if (possibleAcid.getAttribute(SUFFIXAPPLIESTO_ATR)!=null){//multi acid. yl should be one oyl and the rest carboxylic acids - Element suffix = (Element) XOMTools.getNextSibling(possibleAcid, SUFFIX_EL); - if (suffix.getAttribute(ADDITIONALVALUE_ATR)==null){ + if (possibleAcid.getAttribute(SUFFIXAPPLIESTO_ATR) != null){//multi acid. yl should be one oyl and the rest carboxylic acids + Element suffix = OpsinTools.getNextSibling(possibleAcid, SUFFIX_EL); + if (suffix.getAttribute(ADDITIONALVALUE_ATR) == null){ suffix.addAttribute(new Attribute(ADDITIONALVALUE_ATR, "ic")); } } String subType = possibleAcid.getAttributeValue(SUBTYPE_ATR); if (subType.equals(YLFORYL_SUBTYPE_VAL) || subType.equals(YLFORNOTHING_SUBTYPE_VAL)){ - possibleAcid.getAttribute(SUBTYPE_ATR).setValue(YLFORACYL_SUBTYPE_VAL);//yl always means an acyl when next to coenzyme A + possibleAcid.getAttribute(SUBTYPE_ATR).setValue(YLFORACYL_SUBTYPE_VAL);//yl always means an acyl when next to coenzyme A } } } } - //locanted substitution onto Coenzyme A is rarely intended, so put it in a bracket to disfavour it - Element newBracket = new Element(BRACKET_EL); - XOMTools.insertAfter(enclosingSubOrRoot, newBracket); - enclosingSubOrRoot.detach(); - newBracket.appendChild(enclosingSubOrRoot); + //locanted substitution onto Coenzyme A is rarely intended, so put prior content into a bracket to disfavour it + Element enclosingBracketOrWord = enclosingSubOrRoot.getParent(); + int indexOfCoa = enclosingBracketOrWord.indexOf(enclosingSubOrRoot); + if (indexOfCoa > 0) { + Element newBracket = new GroupingEl(BRACKET_EL); + List precedingElements = enclosingBracketOrWord.getChildElements(); + for (int i = 0; i < indexOfCoa; i++) { + Element precedingElement = precedingElements.get(i); + precedingElement.detach(); + newBracket.addChild(precedingElement); + } + OpsinTools.insertBefore(enclosingSubOrRoot, newBracket); + } } - else if (groupValue.equals("sphinganine") || groupValue.equals("icosasphinganine") || groupValue.equals("eicosasphinganine") || groupValue.equals("phytosphingosine") || groupValue.equals("sphingosine")){ - Element enclosingSubOrRoot = (Element) group.getParent(); - Element previous = (Element) XOMTools.getPreviousSibling(enclosingSubOrRoot); + else if (groupValue.equals("sphinganine") || groupValue.equals("icosasphinganine") || groupValue.equals("eicosasphinganine") || groupValue.equals("phytosphingosine") || groupValue.equals("sphingosine") + || groupValue.equals("sphinganin") || groupValue.equals("icosasphinganin") || groupValue.equals("eicosasphinganin") || groupValue.equals("phytosphingosin") || groupValue.equals("sphingosin")){ + Element enclosingSubOrRoot = group.getParent(); + Element previous = OpsinTools.getPreviousSibling(enclosingSubOrRoot); if (previous!=null){ - List groups = XOMTools.getDescendantElementsWithTagName(previous, GROUP_EL); + List groups = OpsinTools.getDescendantElementsWithTagName(previous, GROUP_EL); if (groups.size()>0){ Element possibleAcid = groups.get(groups.size()-1); if (ALKANESTEM_SUBTYPE_VAL.equals(possibleAcid.getAttributeValue(SUBTYPE_ATR))){ - List inlineSuffixes = XOMTools.getChildElementsWithTagNameAndAttribute((Element) possibleAcid.getParent(), SUFFIX_EL, TYPE_ATR, INLINE_TYPE_VAL); + List inlineSuffixes = OpsinTools.getChildElementsWithTagNameAndAttribute(possibleAcid.getParent(), SUFFIX_EL, TYPE_ATR, INLINE_TYPE_VAL); if (inlineSuffixes.size()==1 && inlineSuffixes.get(0).getAttributeValue(VALUE_ATR).equals("yl")){ inlineSuffixes.get(0).getAttribute(VALUE_ATR).setValue("oyl");//yl on a systematic acid next to a fatty acid means acyl //c.f. Nomenclature of Lipids 1976, Appendix A, note a @@ -2280,10 +2482,10 @@ else if (groupValue.equals("sel")){ //check that it is not "selenium" if (HETEROSTEM_SUBTYPE_VAL.equals(group.getAttributeValue(SUBTYPE_ATR)) && group.getAttribute(SUBSEQUENTUNSEMANTICTOKEN_ATR) ==null){ - Element unsaturator = (Element) XOMTools.getNextSibling(group); - if (unsaturator !=null && unsaturator.getLocalName().equals(UNSATURATOR_EL) && unsaturator.getValue().equals("en") && group.getAttribute(SUBSEQUENTUNSEMANTICTOKEN_ATR) ==null){ - Element ium = (Element) XOMTools.getNextSibling(unsaturator); - if (ium !=null && ium.getLocalName().equals(SUFFIX_EL) && ium.getValue().equals("ium")){ + Element unsaturator = OpsinTools.getNextSibling(group); + if (unsaturator !=null && unsaturator.getName().equals(UNSATURATOR_EL) && unsaturator.getValue().equals("en") && group.getAttribute(SUBSEQUENTUNSEMANTICTOKEN_ATR) ==null){ + Element ium = OpsinTools.getNextSibling(unsaturator); + if (ium !=null && ium.getName().equals(SUFFIX_EL) && ium.getValue().equals("ium")){ throw new ComponentGenerationException("selenium does not indicate a chain of selenium atoms with a double bond and a positive charge"); } } @@ -2291,10 +2493,10 @@ } else if ((groupValue.equals("keto") || groupValue.equals("aldehydo")) && SIMPLESUBSTITUENT_SUBTYPE_VAL.equals(group.getAttributeValue(SUBTYPE_ATR))){ //check for case where this is specifying the open chain form of a ketose/aldose - Element previousEl = (Element) XOMTools.getPreviousSibling(group); - if (previousEl ==null || !previousEl.getLocalName().equals(LOCANT_EL) || groupValue.equals("aldehydo")){ - Element parentSubstituent = (Element) group.getParent(); - Element nextSubOrRoot = (Element) XOMTools.getNextSibling(parentSubstituent); + Element previousEl = OpsinTools.getPreviousSibling(group); + if (previousEl ==null || !previousEl.getName().equals(LOCANT_EL) || groupValue.equals("aldehydo")){ + Element parentSubstituent = group.getParent(); + Element nextSubOrRoot = OpsinTools.getNextSibling(parentSubstituent); Element parentOfCarbohydate = nextSubOrRoot; Element carbohydrate = null; while (parentOfCarbohydate != null){ @@ -2303,27 +2505,39 @@ carbohydrate = possibleCarbohydrate; break; } - parentOfCarbohydate = (Element) XOMTools.getNextSibling(parentOfCarbohydate); + parentOfCarbohydate = OpsinTools.getNextSibling(parentOfCarbohydate); } if (carbohydrate != null) { - if (XOMTools.getChildElementsWithTagName(parentOfCarbohydate, CARBOHYDRATERINGSIZE_EL).size() > 0){ + if (parentOfCarbohydate.getChildElements(CARBOHYDRATERINGSIZE_EL).size() > 0){ throw new ComponentGenerationException("Carbohydrate has a specified ring size but " + groupValue + " indicates the open chain form!"); } + for (Element suffix : parentOfCarbohydate.getChildElements(SUFFIX_EL)) { + if ("yl".equals(suffix.getAttributeValue(VALUE_ATR))) { + throw new ComponentGenerationException("Carbohydrate appears to be a glycosyl, but " + groupValue + " indicates the open chain form!"); + } + } + Element alphaOrBetaLocantEl = OpsinTools.getPreviousSiblingIgnoringCertainElements(carbohydrate, new String[]{STEREOCHEMISTRY_EL}); + if (alphaOrBetaLocantEl != null && alphaOrBetaLocantEl.getName().equals(LOCANT_EL) ){ + String value = alphaOrBetaLocantEl.getValue(); + if (value.equals("alpha") || value.equals("beta") || value.equals("alpha,beta") || value.equals("beta,alpha")){ + throw new ComponentGenerationException("Carbohydrate has alpha/beta anomeric form but " + groupValue + " indicates the open chain form!"); + } + } group.detach(); - Elements childrenToMove = parentSubstituent.getChildElements(); + List childrenToMove = parentSubstituent.getChildElements(); for (int i = childrenToMove.size() -1 ; i >=0; i--) { Element el = childrenToMove.get(i); - if (!el.getLocalName().equals(HYPHEN_EL)){ + if (!el.getName().equals(HYPHEN_EL)){ el.detach(); nextSubOrRoot.insertChild(el, 0); } } parentSubstituent.detach(); - String carbohydrateAdditionValue = carbohydrate.getAttributeValue(ADDITIONALVALUE_ATR); - //OPSIN assumes a few trival names are more likely to describe the cyclic form. additonalValue contains the SMILES for the acyclic form - if (carbohydrateAdditionValue != null){ - if (carbohydrateAdditionValue.equals("n/a")){ - throw new ComponentGenerationException(carbohydrate.getValue() + " can only describe the cyclic form but " + groupValue + " indicates the open chain form!"); + if (RING_SUBTYPE_VAL.equals(carbohydrate.getAttributeValue(SUBTYPE_ATR))) { + String carbohydrateAdditionValue = carbohydrate.getAttributeValue(ADDITIONALVALUE_ATR); + //OPSIN assumes a few trivial names are more likely to describe the cyclic form. additionalValue contains the SMILES for the acyclic form + if (carbohydrateAdditionValue == null){ + throw new ComponentGenerationException(carbohydrate.getValue() + " can only describe the cyclic form but " + groupValue + " indicates the open chain form!"); } carbohydrate.getAttribute(VALUE_ATR).setValue(carbohydrateAdditionValue); } @@ -2334,25 +2548,25 @@ } } else if (groupValue.equals("bor") || groupValue.equals("antimon") - || groupValue.equals("arsen") || groupValue.equals("phosphor") || groupValue.equals("phosphate") + || groupValue.equals("arsen") || groupValue.equals("phosphor") || groupValue.equals("phosphate") || groupValue.equals("phosphat") || groupValue.equals("silicicacid") || groupValue.equals("silicic acid") - || groupValue.equals("silicate")){//fluoroboric acid/fluoroborate are trivial rather than systematic; tetra(fooyl)borate is inorganic + || groupValue.equals("silicate") || groupValue.equals("silicat")){//fluoroboric acid/fluoroborate are trivial rather than systematic; tetra(fooyl)borate is inorganic Element suffix = null; Boolean isAcid = null; if (groupValue.endsWith("acid")){ - if (XOMTools.getNext(group) == null){ + if (OpsinTools.getNext(group) == null){ isAcid = true; } } - else if (groupValue.endsWith("ate")){ - if (XOMTools.getNext(group) == null){ + else if (groupValue.endsWith("ate") || groupValue.endsWith("at")){ + if (OpsinTools.getNext(group) == null){ isAcid = false; } } else{ - suffix = (Element) XOMTools.getNextSibling(group); - if (suffix != null && suffix.getLocalName().equals(SUFFIX_EL) && - suffix.getAttribute(INFIX_ATR) == null && XOMTools.getNext(suffix) == null){ + suffix = OpsinTools.getNextSibling(group); + if (suffix != null && suffix.getName().equals(SUFFIX_EL) && + suffix.getAttribute(INFIX_ATR) == null && OpsinTools.getNext(suffix) == null){ String suffixValue = suffix.getAttributeValue(VALUE_ATR); if (suffixValue.equals("ic")){ isAcid = true; @@ -2363,12 +2577,12 @@ } } if (isAcid != null){//check for inorganic interpretation - Element substituent = (Element) XOMTools.getPreviousSibling(group.getParent()); - if (substituent !=null && (substituent.getLocalName().equals(SUBSTITUENT_EL) || substituent.getLocalName().equals(BRACKET_EL))){ - Elements children = substituent.getChildElements(); + Element substituent = OpsinTools.getPreviousSibling(group.getParent()); + if (substituent !=null && (substituent.getName().equals(SUBSTITUENT_EL) || substituent.getName().equals(BRACKET_EL))){ + List children = substituent.getChildElements(); Element firstChild = children.get(0); boolean matched = false; - if (children.size() ==1 && firstChild.getLocalName().equals(GROUP_EL) && (firstChild.getValue().equals("fluoro") || firstChild.getValue().equals("fluor"))){ + if (children.size() ==1 && firstChild.getName().equals(GROUP_EL) && (firstChild.getValue().equals("fluoro") || firstChild.getValue().equals("fluor"))){ if (groupValue.equals("bor")) { group.getAttribute(VALUE_ATR).setValue(isAcid ? "F[B-](F)(F)F.[H+]" : "F[B-](F)(F)F"); matched = true; @@ -2385,26 +2599,29 @@ substituent.detach(); } } - else if (firstChild.getLocalName().equals(MULTIPLIER_EL)) { - String multiplerVal = firstChild.getAttributeValue(VALUE_ATR); + else if (firstChild.getName().equals(MULTIPLIER_EL)) { + String multiplierVal = firstChild.getAttributeValue(VALUE_ATR); - if (groupValue.equals("bor") && multiplerVal.equals("4")) { - group.getAttribute(VALUE_ATR).setValue(isAcid ? "[B-].[H+]" :"[B-]"); - matched = true; + if (groupValue.equals("bor")){ + if (multiplierVal.equals("4") || (multiplierVal.equals("3") && OpsinTools.getPreviousSibling(substituent) != null)) { + //tri case allows organotrifluoroborates + group.getAttribute(VALUE_ATR).setValue(isAcid ? "[B-].[H+]" :"[B-]"); + matched = true; + } } - else if (groupValue.equals("antimon") && multiplerVal.equals("6")) { + else if (groupValue.equals("antimon") && multiplierVal.equals("6")) { group.getAttribute(VALUE_ATR).setValue(isAcid ? "[Sb-].[H+]" :"[Sb-]"); matched = true; } - else if (groupValue.equals("arsen") && multiplerVal.equals("6")) { + else if (groupValue.equals("arsen") && multiplierVal.equals("6")) { group.getAttribute(VALUE_ATR).setValue(isAcid ? "[As-].[H+]" :"[As-]"); matched = true; } - else if (groupValue.startsWith("phosph") && multiplerVal.equals("6")) { + else if (groupValue.startsWith("phosph") && multiplierVal.equals("6")) { group.getAttribute(VALUE_ATR).setValue(isAcid ? "[P-].[H+]" :"[P-]"); matched = true; } - else if (groupValue.startsWith("silic") && multiplerVal.equals("6")) { + else if (groupValue.startsWith("silic") && multiplierVal.equals("6")) { group.getAttribute(VALUE_ATR).setValue(isAcid ? "[Si|6-2].[H+].[H+]" :"[Si|6-2]"); matched = true; } @@ -2433,6 +2650,43 @@ } } } + else if (ENDINIC_SUBTYPE_VAL.equals(group.getAttributeValue(SUBTYPE_ATR)) && AMINOACID_TYPE_VAL.equals(group.getAttributeValue(TYPE_ATR))) { + //aspartyl and glutamyl typically mean alpha-aspartyl/alpha-glutamyl + String[] suffixAppliesTo = group.getAttributeValue(SUFFIXAPPLIESTO_ATR).split(","); + if (suffixAppliesTo.length == 2) { + Element yl = OpsinTools.getNextSibling(group); + if (yl.getAttributeValue(VALUE_ATR).equals("yl")) { + if (yl.getAttribute(ADDITIONALVALUE_ATR) == null){ + yl.addAttribute(new Attribute(ADDITIONALVALUE_ATR, "ic")); + } + } + } + } else if (SALTCOMPONENT_SUBTYPE_VAL.equals(group.getAttributeValue(SUBTYPE_ATR))) { + Element parse = null; + Element tempParent = group.getParent(); + while (tempParent != null) { + parse = tempParent; + tempParent = tempParent.getParent(); + } + if (parse.getChildCount() <= 1) { + throw new ComponentGenerationException("Group expected to be part of a salt but only one component found. Could be a class of compound: " + groupValue); + } + if (groupValue.length() > 0) { + //e.g. 2HCl + char firstChar = groupValue.charAt(0); + if (firstChar >= '1' && firstChar <= '9') { + Element shouldntBeAmultiplier= OpsinTools.getPreviousSibling(group); + if (shouldntBeAmultiplier != null && shouldntBeAmultiplier.getName().equals(MULTIPLIER_EL)) { + throw new ComponentGenerationException("Unepxected multiplier found before: " + groupValue); + } + Element multiplier = new TokenEl(MULTIPLIER_EL, String.valueOf(firstChar)); + multiplier.addAttribute(TYPE_ATR, BASIC_TYPE_VAL); + multiplier.addAttribute(VALUE_ATR, String.valueOf(firstChar)); + OpsinTools.insertBefore(group, multiplier); + group.setValue(groupValue.substring(1)); + } + } + } } } diff -Nru opsin-1.5.0/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/ComponentProcessor.java opsin-2.3.1/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/ComponentProcessor.java --- opsin-1.5.0/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/ComponentProcessor.java 2013-07-21 14:02:29.000000000 +0000 +++ opsin-2.3.1/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/ComponentProcessor.java 2017-07-23 20:55:18.000000000 +0000 @@ -1,24 +1,25 @@ package uk.ac.cam.ch.wwmm.opsin; +import java.util.ArrayDeque; import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; +import java.util.Comparator; +import java.util.Deque; import java.util.HashMap; import java.util.HashSet; +import java.util.Iterator; +import java.util.LinkedHashMap; import java.util.List; +import java.util.Locale; +import java.util.Map; import java.util.Set; -import java.util.Stack; import java.util.regex.Matcher; import java.util.regex.Pattern; import static uk.ac.cam.ch.wwmm.opsin.XmlDeclarations.*; import static uk.ac.cam.ch.wwmm.opsin.OpsinTools.*; -import nu.xom.Attribute; -import nu.xom.Element; -import nu.xom.Elements; -import nu.xom.Node; - /**Performs structure-aware destructive procedural parsing on parser results. * * @author dl387 @@ -26,18 +27,18 @@ */ class ComponentProcessor { - private final static Pattern matchAddedHydrogenBracket =Pattern.compile("[\\[\\(\\{]([^\\[\\(\\{]*)H[\\]\\)\\}]"); - private final static Pattern matchElementSymbolOrAminoAcidLocant = Pattern.compile("[A-Z][a-z]?'*(\\d+[a-z]?'*)?"); - private final static Pattern matchChalcogenReplacement= Pattern.compile("thio|seleno|telluro"); - private final static Pattern matchInlineSuffixesThatAreAlsoGroups = Pattern.compile("carbon|oxy|sulfen|sulfin|sulfon|selenen|selenin|selenon|telluren|tellurin|telluron"); - private final static String[] traditionalAlkanePositionNames =new String[]{"alpha", "beta", "gamma", "delta", "epsilon", "zeta"}; + private static final Pattern matchAddedHydrogenBracket =Pattern.compile("[\\[\\(\\{]([^\\[\\(\\{]*)H[\\]\\)\\}]"); + private static final Pattern matchElementSymbolOrAminoAcidLocant = Pattern.compile("[A-Z][a-z]?'*(\\d+[a-z]?'*)?"); + private static final Pattern matchChalcogenReplacement= Pattern.compile("thio|seleno|telluro"); + private static final Pattern matchGroupsThatAreAlsoInlineSuffixes = Pattern.compile("carbon|oxy|sulfen|sulfin|sulfon|selenen|selenin|selenon|telluren|tellurin|telluron"); + private static final String[] traditionalAlkanePositionNames =new String[]{"alpha", "beta", "gamma", "delta", "epsilon", "zeta"}; - private final SuffixRules suffixRules; + private final FunctionalReplacement functionalReplacement; + private final SuffixApplier suffixApplier; private final BuildState state; - private final Element parse; //rings that look like HW rings but have other meanings. For the HW like inorganics the true meaning is given - private static final HashMap specialHWRings = new HashMap(); + private static final Map specialHWRings = new HashMap(); static{ //The first entry of the array is a special instruction e.g. blocked or saturated. The correct order of the heteroatoms follows //terminal e is ignored from all of the keys as it is optional in the input name @@ -69,6 +70,7 @@ specialHWRings.put("tellurazolin", new String[]{"","Te","C","N","C","C"}); specialHWRings.put("oxoxolan", new String[]{"","O","C","O","C","C"}); + specialHWRings.put("oxoxol", new String[]{"","O","C","O","C","C"}); specialHWRings.put("oxoxan", new String[]{"","O","C","C","O","C","C"}); specialHWRings.put("oxoxin", new String[]{"","O","C","C","O","C","C"}); @@ -77,10 +79,10 @@ specialHWRings.put("borthiin", new String[]{"saturated","S","B","S","B","S","B"}); } - public ComponentProcessor(SuffixRules suffixRules, BuildState state, Element parse) { - this.suffixRules = suffixRules; + ComponentProcessor(BuildState state, SuffixApplier suffixApplier) { this.state = state; - this.parse = parse; + this.suffixApplier = suffixApplier; + this.functionalReplacement = new FunctionalReplacement(state); } /** @@ -88,34 +90,34 @@ * At this stage one can expect all substituents/roots to have at least 1 group. * Multiple groups are present in, for example, fusion nomenclature. By the end of this function there will be exactly 1 group * associated with each substituent/root. Multiplicative nomenclature can result in there being multiple roots + * @param parse * @throws ComponentGenerationException * @throws StructureBuildingException */ - void processParse() throws ComponentGenerationException, StructureBuildingException { - List words =XOMTools.getDescendantElementsWithTagName(parse, WORD_EL); + void processParse(Element parse) throws ComponentGenerationException, StructureBuildingException { + List words =OpsinTools.getDescendantElementsWithTagName(parse, WORD_EL); int wordCount =words.size(); for (int i = wordCount -1; i>=0; i--) { - Element word =words.get(i); + Element word = words.get(i); String wordRule = OpsinTools.getParentWordRule(word).getAttributeValue(WORDRULE_EL); state.currentWordRule = WordRule.valueOf(wordRule); if (word.getAttributeValue(TYPE_ATR).equals(WordType.functionalTerm.toString())){ continue;//functionalTerms are handled on a case by case basis by wordRules } - List roots = XOMTools.getDescendantElementsWithTagName(word, ROOT_EL); - if (roots.size() >1){ - throw new ComponentGenerationException("Multiple roots, but only 0 or 1 were expected. Found: " +roots.size()); + List roots = OpsinTools.getDescendantElementsWithTagName(word, ROOT_EL); + if (roots.size() > 1){ + throw new ComponentGenerationException("Multiple roots, but only 0 or 1 were expected. Found: " + roots.size()); } - List substituents = XOMTools.getDescendantElementsWithTagName(word, SUBSTITUENT_EL); + List substituents = OpsinTools.getDescendantElementsWithTagName(word, SUBSTITUENT_EL); List substituentsAndRoot = OpsinTools.combineElementLists(substituents, roots); - List brackets = XOMTools.getDescendantElementsWithTagName(word, BRACKET_EL); - List substituentsAndRootAndBrackets =OpsinTools.combineElementLists(substituentsAndRoot, brackets); - List groups = XOMTools.getDescendantElementsWithTagName(word, GROUP_EL); + List brackets = OpsinTools.getDescendantElementsWithTagName(word, BRACKET_EL); + List substituentsAndRootAndBrackets = OpsinTools.combineElementLists(substituentsAndRoot, brackets); + List groups = OpsinTools.getDescendantElementsWithTagName(word, GROUP_EL); for (Element group : groups) { Fragment thisFrag = resolveGroup(state, group); processChargeAndOxidationNumberSpecification(group, thisFrag);//e.g. mercury(2+) or mercury(II) - state.xmlFragmentMap.put(group, thisFrag); } for (Element subOrRoot : substituentsAndRoot) { @@ -123,26 +125,13 @@ processCarbohydrates(subOrRoot);//e.g. glucopyranose (needs to be done before determineLocantMeaning to cope with alpha,beta for undefined anomer stereochemistry) } - for (int j = substituents.size() -1; j >=0; j--) { - Element substituent = substituents.get(j); - boolean removed = removeAndMoveToAppropriateGroupIfHydroSubstituent(substituent);//this REMOVES a substituent just containing hydro/perhydro elements and moves these elements in front of an appropriate ring - if (!removed){ - removed = removeAndMoveToAppropriateGroupIfSubtractivePrefix(substituent); - } - if (removed){ - substituents.remove(j); - substituentsAndRoot.remove(substituent); - substituentsAndRootAndBrackets.remove(substituent); - } - } - - Element finalSubOrRootInWord =(Element) word.getChild(word.getChildElements().size()-1); - while (!finalSubOrRootInWord.getLocalName().equals(ROOT_EL) && !finalSubOrRootInWord.getLocalName().equals(SUBSTITUENT_EL)){ - List children = XOMTools.getChildElementsWithTagNames(finalSubOrRootInWord, new String[]{ROOT_EL, SUBSTITUENT_EL, BRACKET_EL}); - if (children.size()==0){ + Element finalSubOrRootInWord = word.getChild(word.getChildCount() - 1); + while (!finalSubOrRootInWord.getName().equals(ROOT_EL) && !finalSubOrRootInWord.getName().equals(SUBSTITUENT_EL)){ + List children = OpsinTools.getChildElementsWithTagNames(finalSubOrRootInWord, new String[]{ROOT_EL, SUBSTITUENT_EL, BRACKET_EL}); + if (children.size() == 0){ throw new ComponentGenerationException("Unable to find finalSubOrRootInWord"); } - finalSubOrRootInWord = children.get(children.size()-1); + finalSubOrRootInWord = children.get(children.size() - 1); } for (Element subOrRootOrBracket : substituentsAndRootAndBrackets) { @@ -153,14 +142,35 @@ processMultipliers(subOrRoot); detectConjunctiveSuffixGroups(subOrRoot, groups); matchLocantsToDirectFeatures(subOrRoot); - - Elements groupsOfSubOrRoot = subOrRoot.getChildElements(GROUP_EL); - Element lastGroupInSubOrRoot =groupsOfSubOrRoot.get(groupsOfSubOrRoot.size()-1); - preliminaryProcessSuffixes(lastGroupInSubOrRoot, XOMTools.getChildElementsWithTagName(subOrRoot, SUFFIX_EL)); + + List groupsOfSubOrRoot = subOrRoot.getChildElements(GROUP_EL); + if (groupsOfSubOrRoot.size() > 0) { + Element lastGroupInSubOrRoot =groupsOfSubOrRoot.get(groupsOfSubOrRoot.size() - 1); + preliminaryProcessSuffixes(lastGroupInSubOrRoot, subOrRoot.getChildElements(SUFFIX_EL)); + } + } + for (int j = substituents.size() -1; j >=0; j--) { + Element substituent = substituents.get(j); + if (substituent.getChildElements(GROUP_EL).size() == 0) { + boolean removed = removeAndMoveToAppropriateGroupIfHydroSubstituent(substituent);//this REMOVES a substituent just containing hydro/perhydro elements and moves these elements in front of an appropriate ring + if (!removed){ + removed = removeAndMoveToAppropriateGroupIfSubtractivePrefix(substituent); + } + if (!removed){ + removed = removeAndMoveToAppropriateGroupIfRingBridge(substituent); + } + if (!removed){ + throw new RuntimeException("OPSIN Bug: Encountered substituent with no group!: " + substituent.toXML() ); + } + substituents.remove(j); + substituentsAndRoot.remove(substituent); + substituentsAndRootAndBrackets.remove(substituent); + } } - FunctionalReplacement.processAcidReplacingFunctionalClassNomenclature(state, finalSubOrRootInWord, word); + + functionalReplacement.processAcidReplacingFunctionalClassNomenclature(finalSubOrRootInWord, word); - if (FunctionalReplacement.processPrefixFunctionalReplacementNomenclature(state, groups, substituents)){//true if functional replacement performed, 1 or more substituents will have been removed + if (functionalReplacement.processPrefixFunctionalReplacementNomenclature(groups, substituents)){//true if functional replacement performed, 1 or more substituents will have been removed substituentsAndRoot = OpsinTools.combineElementLists(substituents, roots); substituentsAndRootAndBrackets =OpsinTools.combineElementLists(substituentsAndRoot, brackets); } @@ -181,32 +191,37 @@ handleMultiRadicals(subOrRoot); } - //System.out.println(new XOMFormatter().elemToString(elem)); addImplicitBracketsToAminoAcids(groups, brackets); - findAndStructureImplictBrackets(substituents, brackets); + for (Element substituent : substituents) { + matchLocantsToIndirectFeatures(substituent); + addImplicitBracketsWhenSubstituentHasTwoLocants(substituent, brackets); + implicitlyBracketToPreviousSubstituentIfAppropriate(substituent, brackets); + } + for (Element root : roots) { + matchLocantsToIndirectFeatures(root); + } for (Element subOrRoot : substituentsAndRoot) { - matchLocantsToIndirectFeatures(subOrRoot); assignImplicitLocantsToDiTerminalSuffixes(subOrRoot); processConjunctiveNomenclature(subOrRoot); - resolveSuffixes(subOrRoot.getFirstChildElement(GROUP_EL), XOMTools.getChildElementsWithTagName(subOrRoot, SUFFIX_EL)); + suffixApplier.resolveSuffixes(subOrRoot.getFirstChildElement(GROUP_EL), subOrRoot.getChildElements(SUFFIX_EL)); } moveErroneouslyPositionedLocantsAndMultipliers(brackets);//e.g. (tetramethyl)azanium == tetra(methyl)azanium - List children = XOMTools.getChildElementsWithTagNames(word, new String[]{ROOT_EL, SUBSTITUENT_EL, BRACKET_EL}); - while (children.size()==1){ - children = XOMTools.getChildElementsWithTagNames(children.get(0), new String[]{ROOT_EL, SUBSTITUENT_EL, BRACKET_EL}); + List children = OpsinTools.getChildElementsWithTagNames(word, new String[]{ROOT_EL, SUBSTITUENT_EL, BRACKET_EL}); + addImplicitBracketsWhenFirstSubstituentHasTwoMultipliers(children.get(0), brackets);//e.g. ditrifluoroacetic acid --> di(trifluoroacetic acid) + while (children.size() == 1) { + children = OpsinTools.getChildElementsWithTagNames(children.get(0), new String[]{ROOT_EL, SUBSTITUENT_EL, BRACKET_EL}); } - if (children.size()>0){ - assignLocantsToMultipliedRootIfPresent(children.get(children.size()-1));//multiplicative nomenclature e.g. methylenedibenzene or 3,4'-oxydipyridine + if (children.size() > 0) { + assignLocantsToMultipliedRootIfPresent(children.get(children.size() - 1));//multiplicative nomenclature e.g. methylenedibenzene or 3,4'-oxydipyridine } - addImplicitBracketsInCaseWhereSubstituentHasTwoLocants(substituents, brackets); substituentsAndRootAndBrackets =OpsinTools.combineElementLists(substituentsAndRoot, brackets);//implicit brackets may have been created for (Element subBracketOrRoot : substituentsAndRootAndBrackets) { assignLocantsAndMultipliers(subBracketOrRoot); } processBiochemicalLinkageDescriptors(substituents, brackets); - processWordLevelMultiplierIfApplicable(word, wordCount); + processWordLevelMultiplierIfApplicable(word, roots, wordCount); } new WordRulesOmittedSpaceCorrector(state, parse).correctOmittedSpaces();//TODO where should this go? } @@ -219,21 +234,10 @@ * @throws ComponentGenerationException */ static Fragment resolveGroup(BuildState state, Element group) throws StructureBuildingException, ComponentGenerationException { - String groupType = group.getAttributeValue(TYPE_ATR); - String groupSubType = group.getAttributeValue(SUBTYPE_ATR); String groupValue = group.getAttributeValue(VALUE_ATR); - String groupValType = group.getAttributeValue(VALTYPE_ATR); - Fragment thisFrag =null; - if(groupValType.equals(SMILES_VALTYPE_VAL)) { - if (group.getAttribute(LABELS_ATR)!=null){ - thisFrag = state.fragManager.buildSMILES(groupValue, groupType, groupSubType, group.getAttributeValue(LABELS_ATR)); - } - else{ - thisFrag = state.fragManager.buildSMILES(groupValue, groupType, groupSubType, ""); - } - } else{ - throw new StructureBuildingException("Group tag has bad or missing valType: " + group.toXML()); - } + String labelsValue = group.getAttributeValue(LABELS_ATR); + Fragment thisFrag = state.fragManager.buildSMILES(groupValue, group, labelsValue != null ? labelsValue : NONE_LABELS_VAL); + group.setFrag(thisFrag); //processes groups like cymene and xylene whose structure is determined by the presence of a locant in front e.g. p-xylene processXyleneLikeNomenclature(state, group, thisFrag); @@ -241,8 +245,57 @@ setFragmentDefaultInAtomIfSpecified(thisFrag, group); setFragmentFunctionalAtomsIfSpecified(group, thisFrag); applyTraditionalAlkaneNumberingIfAppropriate(group, thisFrag); + applyHomologyGroupLabelsIfSpecified(group, thisFrag); return thisFrag; } + + private enum AtomReferenceType { + ID, + DEFAULTID, + LOCANT, + DEFAULTLOCANT + } + + private static class AtomReference { + private final AtomReferenceType referenceType; + private final String reference; + + AtomReference(AtomReferenceType referenceType, String reference) { + this.referenceType = referenceType; + this.reference = reference; + } + } + + private static class AddGroup { + private final Fragment frag; + private AtomReference atomReference; + + AddGroup(Fragment frag, AtomReference atomReference) { + this.frag = frag; + this.atomReference = atomReference; + } + } + + private static class AddHeteroatom { + private final String heteroAtomSmiles; + private AtomReference atomReference; + + AddHeteroatom(String heteroAtomSmiles, AtomReference atomReference) { + this.heteroAtomSmiles = heteroAtomSmiles; + this.atomReference = atomReference; + } + } + + private static class AddBond { + private final int bondOrder; + private AtomReference atomReference; + + AddBond(int bondOrder, AtomReference atomReference) { + this.bondOrder = bondOrder; + this.atomReference = atomReference; + } + } + /** * Checks for groups with the addGroup/addBond/addHeteroAtom attributes. For the addGroup attribute adds the group defined by the SMILES described within @@ -255,63 +308,64 @@ * @throws ComponentGenerationException */ private static void processXyleneLikeNomenclature(BuildState state, Element group, Fragment parentFrag) throws StructureBuildingException, ComponentGenerationException { - if(group.getAttribute(ADDGROUP_ATR)!=null) { - String addGroupInformation=group.getAttributeValue(ADDGROUP_ATR); - String[] groupsToBeAdded = MATCH_SEMICOLON.split(addGroupInformation);//typically only one, but 2 in the case of xylene and quinones - ArrayList> allGroupInformation = new ArrayList>(); - for (String groupToBeAdded : groupsToBeAdded) {//populate allGroupInformation list - String[] tempArray = MATCH_SPACE.split(groupToBeAdded); - HashMap groupInformation = new HashMap(); - if (tempArray.length != 2 && tempArray.length != 3) { - throw new ComponentGenerationException("malformed addGroup tag"); - } - groupInformation.put("SMILES", tempArray[0]); - if (tempArray[1].startsWith("id")) { - groupInformation.put("atomReferenceType", "id"); - groupInformation.put("atomReference", tempArray[1].substring(2)); - } else if (tempArray[1].startsWith("locant")) { - groupInformation.put("atomReferenceType", "locant"); - groupInformation.put("atomReference", tempArray[1].substring(6)); - } else { - throw new ComponentGenerationException("malformed addGroup tag"); - } - if (tempArray.length == 3) {//labels may optionally be specified for the group to be added - groupInformation.put("labels", tempArray[2]); - } - allGroupInformation.add(groupInformation); - } - Element previousEl =(Element) XOMTools.getPreviousSibling(group); - if (previousEl !=null && previousEl.getLocalName().equals(LOCANT_EL)){//has the name got specified locants to override the default ones - List locantValues =StringTools.arrayToList(MATCH_COMMA.split(previousEl.getValue())); - if ((locantValues.size()==groupsToBeAdded.length || locantValues.size() +1 ==groupsToBeAdded.length) && locantAreAcceptableForXyleneLikeNomenclatures(locantValues, group)){//one locant can be implicit in some cases - boolean assignlocants =true; - if (locantValues.size()!=groupsToBeAdded.length){ + boolean ambiguous = false; + + if(group.getAttribute(ADDGROUP_ATR) != null) { + String addGroupInformation = group.getAttributeValue(ADDGROUP_ATR); + List groupsToBeAdded = new ArrayList(); + ////typically only one, but 2 in the case of xylene and quinones + for (String groupToBeAdded : addGroupInformation.split(";")) { + String[] description = groupToBeAdded.split(" "); + if (description.length < 3 || description.length > 4) { + throw new ComponentGenerationException("malformed addGroup tag"); + } + String smiles = description[0]; + AtomReferenceType referenceType = AtomReferenceType.valueOf(description[1].toUpperCase(Locale.ROOT)); + String reference = description[2]; + Fragment fragToAdd; + if (description.length == 4) {//labels may optionally be specified for the group to be added + fragToAdd = state.fragManager.buildSMILES(smiles, group, description[3]); + } + else{ + fragToAdd = state.fragManager.buildSMILES(smiles, group, NONE_LABELS_VAL); + } + groupsToBeAdded.add(new AddGroup(fragToAdd, new AtomReference(referenceType, reference))); + } + Element previousEl = OpsinTools.getPreviousSibling(group); + if (previousEl !=null && previousEl.getName().equals(LOCANT_EL)){//has the name got specified locants to override the default ones + List locantValues = StringTools.arrayToList(previousEl.getValue().split(",")); + if ((locantValues.size() == groupsToBeAdded.size() || locantValues.size() + 1 == groupsToBeAdded.size()) && + locantAreAcceptableForXyleneLikeNomenclatures(locantValues, group)){//one locant can be implicit in some cases + boolean assignlocants = true; + if (locantValues.size() != groupsToBeAdded.size()){ //check that the firstGroup by default will be added to the atom with locant 1. If this is not the case then as many locants as there were groups should of been specified //or no locants should have been specified, which is what will be assumed (i.e. the locants will be left unassigned) - HashMap groupInformation =allGroupInformation.get(0); + AddGroup groupInformation = groupsToBeAdded.get(0); String locant; - if (groupInformation.get("atomReferenceType").equals("locant")){ - locant =parentFrag.getAtomByLocantOrThrow(groupInformation.get("atomReference")).getFirstLocant(); - } - else if (groupInformation.get("atomReferenceType").equals("id") ){ - locant =parentFrag.getAtomByIDOrThrow(parentFrag.getIdOfFirstAtom() + Integer.parseInt(groupInformation.get("atomReference")) -1 ).getFirstLocant(); - } - else{ + switch (groupInformation.atomReference.referenceType) { + case DEFAULTLOCANT: + case LOCANT: + locant = parentFrag.getAtomByLocantOrThrow(groupInformation.atomReference.reference).getFirstLocant(); + break; + case DEFAULTID: + case ID: + locant = parentFrag.getAtomByIDOrThrow(parentFrag.getIdOfFirstAtom() + Integer.parseInt(groupInformation.atomReference.reference) - 1).getFirstLocant(); + break; + default: throw new ComponentGenerationException("malformed addGroup tag"); } - if (locant ==null || !locant.equals("1")){ - assignlocants=false; + if (locant == null || !locant.equals("1")){ + assignlocants = false; } } if (assignlocants){ - for (int i = groupsToBeAdded.length -1; i >=0 ; i--) { + for (int i = groupsToBeAdded.size() - 1; i >=0 ; i--) { //if less locants than expected are specified the locants of only the later groups will be changed //e.g. 4-xylene will transform 1,2-xylene to 1,4-xylene - HashMap groupInformation =allGroupInformation.get(i); + AddGroup groupInformation = groupsToBeAdded.get(i); if (locantValues.size() >0){ - groupInformation.put("atomReferenceType", "locant"); - groupInformation.put("atomReference", locantValues.get(locantValues.size()-1)); - locantValues.remove(locantValues.size()-1); + groupInformation.atomReference = new AtomReference(AtomReferenceType.LOCANT, locantValues.get(locantValues.size() - 1)); + locantValues.remove(locantValues.size() - 1); } else{ break; @@ -323,27 +377,29 @@ } } - for (int i = 0; i < groupsToBeAdded.length; i++) { - HashMap groupInformation =allGroupInformation.get(i); - String smilesOfGroupToBeAdded = groupInformation.get("SMILES"); - Fragment newFrag; - if (groupInformation.get("labels")!=null){ - newFrag = state.fragManager.buildSMILES(smilesOfGroupToBeAdded, parentFrag.getType(), parentFrag.getSubType(), groupInformation.get("labels")); - } - else{ - newFrag = state.fragManager.buildSMILES(smilesOfGroupToBeAdded, parentFrag.getType(), parentFrag.getSubType(), NONE_LABELS_VAL); - } - - Atom atomOnParentFrag =null; - if (groupInformation.get("atomReferenceType").equals("locant")){ - atomOnParentFrag=parentFrag.getAtomByLocantOrThrow(groupInformation.get("atomReference")); - } - else if (groupInformation.get("atomReferenceType").equals("id") ){ - atomOnParentFrag= parentFrag.getAtomByIDOrThrow(parentFrag.getIdOfFirstAtom() + Integer.parseInt(groupInformation.get("atomReference")) -1); - } - else{ + for (int i = 0; i < groupsToBeAdded.size(); i++) { + AddGroup groupInformation = groupsToBeAdded.get(i); + Fragment newFrag = groupInformation.frag; + + Atom atomOnParentFrag; + switch (groupInformation.atomReference.referenceType) { + case DEFAULTLOCANT: + ambiguous = true; + case LOCANT: + if (groupInformation.atomReference.reference.equals("required")) { + throw new ComponentGenerationException(group.getValue() + " requires an allowed locant"); + } + atomOnParentFrag = parentFrag.getAtomByLocantOrThrow(groupInformation.atomReference.reference); + break; + case DEFAULTID: + ambiguous = true; + case ID: + atomOnParentFrag = parentFrag.getAtomByIDOrThrow(parentFrag.getIdOfFirstAtom() + Integer.parseInt(groupInformation.atomReference.reference) -1); + break; + default: throw new ComponentGenerationException("malformed addGroup tag"); } + if (newFrag.getOutAtomCount() >1){ throw new ComponentGenerationException("too many outAtoms on group to be added"); } @@ -353,97 +409,85 @@ state.fragManager.incorporateFragment(newFrag, newFragOutAtom.getAtom(), parentFrag, atomOnParentFrag, newFragOutAtom.getValency()); } else{ - Atom atomOnNewFrag = newFrag.getDefaultInAtom(); + Atom atomOnNewFrag = newFrag.getDefaultInAtomOrFirstAtom(); state.fragManager.incorporateFragment(newFrag, atomOnNewFrag, parentFrag, atomOnParentFrag, 1); } } } - if(group.getAttributeValue(ADDHETEROATOM_ATR)!=null) { - String addHeteroAtomInformation=group.getAttributeValue(ADDHETEROATOM_ATR); - String[] heteroAtomsToBeAdded = MATCH_SEMICOLON.split(addHeteroAtomInformation); - ArrayList> allHeteroAtomInformation = new ArrayList>(); - for (String heteroAtomToBeAdded : heteroAtomsToBeAdded) {//populate allHeteroAtomInformation list - String[] tempArray = MATCH_SPACE.split(heteroAtomToBeAdded); - HashMap heteroAtomInformation = new HashMap(); - if (tempArray.length != 2) { - throw new ComponentGenerationException("malformed addHeteroAtom tag"); - } - heteroAtomInformation.put("SMILES", tempArray[0]); - if (tempArray[1].startsWith("id")) { - heteroAtomInformation.put("atomReferenceType", "id"); - heteroAtomInformation.put("atomReference", tempArray[1].substring(2)); - } else if (tempArray[1].startsWith("locant")) { - heteroAtomInformation.put("atomReferenceType", "locant"); - heteroAtomInformation.put("atomReference", tempArray[1].substring(6)); - } else { - throw new ComponentGenerationException("malformed addHeteroAtom tag"); - } - allHeteroAtomInformation.add(heteroAtomInformation); - } - Element previousEl =(Element) XOMTools.getPreviousSibling(group); - if (previousEl !=null && previousEl.getLocalName().equals(LOCANT_EL)){//has the name got specified locants to override the default ones - List locantValues =StringTools.arrayToList(MATCH_COMMA.split(previousEl.getValue())); - if (locantValues.size() ==heteroAtomsToBeAdded.length && locantAreAcceptableForXyleneLikeNomenclatures(locantValues, group)){ - for (int i = heteroAtomsToBeAdded.length -1; i >=0 ; i--) {//all heteroatoms must have a locant or default locants will be used - HashMap groupInformation =allHeteroAtomInformation.get(i); - groupInformation.put("atomReferenceType", "locant"); - groupInformation.put("atomReference", locantValues.get(locantValues.size()-1)); - locantValues.remove(locantValues.size()-1); + if(group.getAttributeValue(ADDHETEROATOM_ATR) != null) { + String addHeteroAtomInformation = group.getAttributeValue(ADDHETEROATOM_ATR); + List heteroAtomsToBeAdded = new ArrayList(); + for (String heteroAtomToBeAdded : addHeteroAtomInformation.split(";")) { + String[] description = heteroAtomToBeAdded.split(" "); + if (description.length != 3) { + throw new ComponentGenerationException("malformed addHeteroAtom tag"); + } + String heteroAtomSmiles = description[0]; + AtomReferenceType referenceType = AtomReferenceType.valueOf(description[1].toUpperCase(Locale.ROOT)); + String reference = description[2]; + heteroAtomsToBeAdded.add(new AddHeteroatom(heteroAtomSmiles, new AtomReference(referenceType, reference))); + } + Element previousEl = OpsinTools.getPreviousSibling(group); + if (previousEl != null && previousEl.getName().equals(LOCANT_EL)){//has the name got specified locants to override the default ones + List locantValues =StringTools.arrayToList(previousEl.getValue().split(",")); + if (locantValues.size() == heteroAtomsToBeAdded.size() && locantAreAcceptableForXyleneLikeNomenclatures(locantValues, group)){ + for (int i = heteroAtomsToBeAdded.size() -1; i >=0 ; i--) {//all heteroatoms must have a locant or default locants will be used + AddHeteroatom groupInformation = heteroAtomsToBeAdded.get(i); + groupInformation.atomReference = new AtomReference(AtomReferenceType.LOCANT, locantValues.get(locantValues.size() - 1)); + locantValues.remove(locantValues.size() - 1); } group.removeAttribute(group.getAttribute(FRONTLOCANTSEXPECTED_ATR)); previousEl.detach(); } } - for (int i = 0; i < heteroAtomsToBeAdded.length; i++) { - HashMap heteroAtomInformation =allHeteroAtomInformation.get(i); - Atom atomOnParentFrag =null; - if (heteroAtomInformation.get("atomReferenceType").equals("locant")){ - atomOnParentFrag=parentFrag.getAtomByLocantOrThrow(heteroAtomInformation.get("atomReference")); - } - else if (heteroAtomInformation.get("atomReferenceType").equals("id") ){ - atomOnParentFrag= parentFrag.getAtomByIDOrThrow(parentFrag.getIdOfFirstAtom() + Integer.parseInt(heteroAtomInformation.get("atomReference")) -1); - } - else{ + for (int i = 0; i < heteroAtomsToBeAdded.size(); i++) { + AddHeteroatom heteroAtomInformation = heteroAtomsToBeAdded.get(i); + Atom atomOnParentFrag = null; + switch (heteroAtomInformation.atomReference.referenceType) { + case DEFAULTLOCANT: + ambiguous = true; + case LOCANT: + if (heteroAtomInformation.atomReference.reference.equals("required")) { + throw new ComponentGenerationException(group.getValue() + " requires an allowed locant"); + } + atomOnParentFrag = parentFrag.getAtomByLocantOrThrow(heteroAtomInformation.atomReference.reference); + break; + case DEFAULTID: + ambiguous = true; + case ID: + atomOnParentFrag = parentFrag.getAtomByIDOrThrow(parentFrag.getIdOfFirstAtom() + Integer.parseInt(heteroAtomInformation.atomReference.reference) - 1); + break; + default: throw new ComponentGenerationException("malformed addHeteroAtom tag"); } - state.fragManager.replaceAtomWithSmiles(atomOnParentFrag, heteroAtomInformation.get("SMILES")); + state.fragManager.replaceAtomWithSmiles(atomOnParentFrag, heteroAtomInformation.heteroAtomSmiles); } } - if(group.getAttributeValue(ADDBOND_ATR)!=null && !HANTZSCHWIDMAN_SUBTYPE_VAL.equals(group.getAttributeValue(SUBTYPE_ATR))) {//HW add bond is handled later - String addBondInformation=group.getAttributeValue(ADDBOND_ATR); - String[] bondsToBeAdded = MATCH_SEMICOLON.split(addBondInformation); - ArrayList> allBondInformation = new ArrayList>(); - for (String bondToBeAdded : bondsToBeAdded) {//populate allBondInformation list - String[] tempArray = MATCH_SPACE.split(bondToBeAdded); - HashMap bondInformation = new HashMap(); - if (tempArray.length != 2) { - throw new ComponentGenerationException("malformed addBond tag"); - } - bondInformation.put("bondOrder", tempArray[0]); - if (tempArray[1].startsWith("id")) { - bondInformation.put("atomReferenceType", "id"); - bondInformation.put("atomReference", tempArray[1].substring(2)); - } else if (tempArray[1].startsWith("locant")) { - bondInformation.put("atomReferenceType", "locant"); - bondInformation.put("atomReference", tempArray[1].substring(6)); - } else { - throw new ComponentGenerationException("malformed addBond tag"); - } - allBondInformation.add(bondInformation); - } - boolean locanted = false; - Element previousEl =(Element) XOMTools.getPreviousSibling(group); - if (previousEl !=null && previousEl.getLocalName().equals(LOCANT_EL)){//has the name got specified locants to override the default ones - List locantValues =StringTools.arrayToList(MATCH_COMMA.split(previousEl.getValue())); - if (locantValues.size() ==bondsToBeAdded.length && locantAreAcceptableForXyleneLikeNomenclatures(locantValues, group)){ - for (int i = bondsToBeAdded.length -1; i >=0 ; i--) {//all bond order changes must have a locant or default locants will be used - HashMap bondInformation =allBondInformation.get(i); - bondInformation.put("atomReferenceType", "locant"); - bondInformation.put("atomReference", locantValues.get(locantValues.size()-1)); - locantValues.remove(locantValues.size()-1); + if(group.getAttributeValue(ADDBOND_ATR) != null && !HANTZSCHWIDMAN_SUBTYPE_VAL.equals(group.getAttributeValue(SUBTYPE_ATR))) {//HW add bond is handled later + String addBondInformation = group.getAttributeValue(ADDBOND_ATR); + List bondsToBeAdded = new ArrayList(); + for (String bondToBeAdded : addBondInformation.split(";")) { + String[] description = bondToBeAdded.split(" "); + if (description.length != 3) { + throw new ComponentGenerationException("malformed addBond tag"); + } + int bondOrder = Integer.parseInt(description[0]); + AtomReferenceType referenceType = AtomReferenceType.valueOf(description[1].toUpperCase(Locale.ROOT)); + String reference = description[2]; + bondsToBeAdded.add(new AddBond(bondOrder, new AtomReference(referenceType, reference))); + } + boolean locanted = false; + Element previousEl = OpsinTools.getPreviousSibling(group); + if (previousEl != null && previousEl.getName().equals(LOCANT_EL)){//has the name got specified locants to override the default ones + List locantValues = StringTools.arrayToList(previousEl.getValue().split(",")); + if (locantValues.size() == bondsToBeAdded.size() && locantAreAcceptableForXyleneLikeNomenclatures(locantValues, group)){ + for (int i = bondsToBeAdded.size() -1; i >=0 ; i--) {//all bond order changes must have a locant or default locants will be used + AddBond bondInformation = bondsToBeAdded.get(i); + bondInformation.atomReference = new AtomReference(AtomReferenceType.LOCANT, locantValues.get(locantValues.size() - 1)); + locantValues.remove(locantValues.size() - 1); } group.removeAttribute(group.getAttribute(FRONTLOCANTSEXPECTED_ATR)); previousEl.detach(); @@ -451,22 +495,30 @@ } } - for (int i = 0; i < bondsToBeAdded.length; i++) { - HashMap bondInformation =allBondInformation.get(i); - Atom atomOnParentFrag =null; - if (bondInformation.get("atomReferenceType").equals("locant")){ - atomOnParentFrag=parentFrag.getAtomByLocantOrThrow(bondInformation.get("atomReference")); - } - else if (bondInformation.get("atomReferenceType").equals("id") ){ - atomOnParentFrag= parentFrag.getAtomByIDOrThrow(parentFrag.getIdOfFirstAtom() + Integer.parseInt(bondInformation.get("atomReference")) -1); - } - else{ + for (int i = 0; i < bondsToBeAdded.size(); i++) { + AddBond bondInformation = bondsToBeAdded.get(i); + Atom atomOnParentFrag; + switch (bondInformation.atomReference.referenceType) { + case DEFAULTLOCANT: + ambiguous = true; + case LOCANT: + if (bondInformation.atomReference.reference.equals("required")) { + throw new ComponentGenerationException(group.getValue() + " requires an allowed locant"); + } + atomOnParentFrag=parentFrag.getAtomByLocantOrThrow(bondInformation.atomReference.reference); + break; + case DEFAULTID: + ambiguous = true; + case ID: + atomOnParentFrag= parentFrag.getAtomByIDOrThrow(parentFrag.getIdOfFirstAtom() + Integer.parseInt(bondInformation.atomReference.reference) -1); + break; + default: throw new ComponentGenerationException("malformed addBond tag"); } - Bond b = FragmentTools.unsaturate(atomOnParentFrag, Integer.parseInt(bondInformation.get("bondOrder")) , parentFrag); - if (!locanted && b.getOrder() ==2 && - parentFrag.getAtomList().size()==5 && + Bond b = FragmentTools.unsaturate(atomOnParentFrag, bondInformation.bondOrder, parentFrag); + if (!locanted && b.getOrder() == 2 && + parentFrag.getAtomCount() == 5 && b.getFromAtom().getAtomIsInACycle() && b.getToAtom().getAtomIsInACycle()){ //special case just that substitution of groups like imidazoline may actually remove the double bond... @@ -476,6 +528,9 @@ } } } + if (ambiguous) { + state.addIsAmbiguous(group.getValue() +" describes multiple structures"); + } } /** @@ -485,10 +540,10 @@ * @return */ private static boolean locantAreAcceptableForXyleneLikeNomenclatures(List locantValues, Element group) { - if (group.getAttribute(FRONTLOCANTSEXPECTED_ATR)==null){ + if (group.getAttribute(FRONTLOCANTSEXPECTED_ATR) == null){ throw new IllegalArgumentException("Group must have frontLocantsExpected to implement xylene-like nomenclature"); } - List allowedLocants = Arrays.asList(MATCH_COMMA.split(group.getAttributeValue(FRONTLOCANTSEXPECTED_ATR))); + List allowedLocants = Arrays.asList(group.getAttributeValue(FRONTLOCANTSEXPECTED_ATR).split(",")); for (String locant : locantValues) { if (!allowedLocants.contains(locant)){ return false; @@ -499,50 +554,19 @@ /** - * Looks for the presence of DEFAULTINLOCANT_ATR and DEFAULTINID_ATR on the group and applies them to the fragment - * Also sets the default in atom for alkanes so that say methylethyl is prop-2-yl rather than propyl + * Looks for the presence of {@link XmlDeclarations#DEFAULTINLOCANT_ATR} and {@link XmlDeclarations#DEFAULTINID_ATR} on the group and applies them to the fragment * @param thisFrag * @param group * @throws StructureBuildingException */ private static void setFragmentDefaultInAtomIfSpecified(Fragment thisFrag, Element group) throws StructureBuildingException { - String groupSubType = group.getAttributeValue(SUBTYPE_ATR); - if (group.getAttribute(DEFAULTINLOCANT_ATR)!=null){//sets the atom at which substitution will occur to by default - thisFrag.setDefaultInAtom(thisFrag.getAtomByLocantOrThrow(group.getAttributeValue(DEFAULTINLOCANT_ATR))); + String defaultInLocant = group.getAttributeValue(DEFAULTINLOCANT_ATR); + String defaultInId = group.getAttributeValue(DEFAULTINID_ATR); + if (defaultInLocant != null){//sets the atom at which substitution will occur to by default + thisFrag.setDefaultInAtom(thisFrag.getAtomByLocantOrThrow(defaultInLocant)); } - else if (group.getAttribute(DEFAULTINID_ATR)!=null){ - thisFrag.setDefaultInAtom(thisFrag.getAtomByIDOrThrow(thisFrag.getIdOfFirstAtom() + Integer.parseInt(group.getAttributeValue(DEFAULTINID_ATR)) -1)); - } - else if ("yes".equals(group.getAttributeValue(USABLEASJOINER_ATR)) && group.getAttribute(SUFFIXAPPLIESTO_ATR)==null){//makes linkers by default attach end to end - int chainLength =thisFrag.getChainLength(); - if (chainLength >1){ - boolean connectEndToEndWithPreviousSub =true; - if (groupSubType.equals(ALKANESTEM_SUBTYPE_VAL)){//don't do this if the group is preceded by another alkaneStem e.g. methylethyl makes more sense as prop-2-yl rather than propyl - Element previousSubstituent =(Element) XOMTools.getPreviousSibling(group.getParent()); - if (previousSubstituent!=null){ - Elements groups = previousSubstituent.getChildElements(GROUP_EL); - if (groups.size()==1 && groups.get(0).getAttributeValue(SUBTYPE_ATR).equals(ALKANESTEM_SUBTYPE_VAL) && !groups.get(0).getAttributeValue(TYPE_ATR).equals(RING_TYPE_VAL)){ - connectEndToEndWithPreviousSub = false; - } - } - } - if (connectEndToEndWithPreviousSub){ - Element parent =(Element) group.getParent(); - while (parent.getLocalName().equals(BRACKET_EL)){ - parent = (Element) parent.getParent(); - } - if (parent.getLocalName().equals(ROOT_EL)){ - Element previous = (Element) XOMTools.getPrevious(group); - if (previous==null || !previous.getLocalName().equals(MULTIPLIER_EL)){ - connectEndToEndWithPreviousSub=false; - } - } - } - if (connectEndToEndWithPreviousSub){ - group.addAttribute(new Attribute(DEFAULTINID_ATR, Integer.toString(chainLength))); - thisFrag.setDefaultInAtom(thisFrag.getAtomByLocantOrThrow(Integer.toString(chainLength))); - } - } + else if (defaultInId != null){ + thisFrag.setDefaultInAtom(thisFrag.getAtomByIDOrThrow(thisFrag.getIdOfFirstAtom() + Integer.parseInt(defaultInId) - 1)); } } @@ -555,10 +579,10 @@ */ private static void setFragmentFunctionalAtomsIfSpecified(Element group, Fragment thisFrag) throws StructureBuildingException { if (group.getAttribute(FUNCTIONALIDS_ATR)!=null){ - String[] functionalIDs = MATCH_COMMA.split(group.getAttributeValue(FUNCTIONALIDS_ATR)); - for (String functionalID : functionalIDs) { - thisFrag.addFunctionalAtom(thisFrag.getAtomByIDOrThrow(thisFrag.getIdOfFirstAtom() + Integer.parseInt(functionalID) - 1)); - } + String[] functionalIDs = group.getAttributeValue(FUNCTIONALIDS_ATR).split(","); + for (String functionalID : functionalIDs) { + thisFrag.addFunctionalAtom(thisFrag.getAtomByIDOrThrow(thisFrag.getIdOfFirstAtom() + Integer.parseInt(functionalID) - 1)); + } } } @@ -568,36 +592,39 @@ if (groupType.equals(ACIDSTEM_TYPE_VAL)){ List atomList = thisFrag.getAtomList(); Atom startingAtom = thisFrag.getFirstAtom(); - if (group.getAttribute(SUFFIXAPPLIESTO_ATR)!=null){ + if (group.getAttribute(SUFFIXAPPLIESTO_ATR) != null){ String suffixAppliesTo = group.getAttributeValue(SUFFIXAPPLIESTO_ATR); - String suffixAppliesToArr[] = MATCH_COMMA.split(suffixAppliesTo); - if (suffixAppliesToArr.length!=1){ + String suffixAppliesToArr[] = suffixAppliesTo.split(","); + if (suffixAppliesToArr.length != 1){ return; } - startingAtom = atomList.get(Integer.parseInt(suffixAppliesToArr[0])-1); + startingAtom = atomList.get(Integer.parseInt(suffixAppliesToArr[0]) - 1); } List neighbours = startingAtom.getAtomNeighbours(); - int counter =-1; + int counter = -1; Atom previousAtom = startingAtom; - for (int i = neighbours.size()-1; i >=0; i--) {//only consider carbon atoms - if (!neighbours.get(i).getElement().equals("C")){ + for (int i = neighbours.size() - 1; i >=0; i--) {//only consider carbon atoms + if (neighbours.get(i).getElement() != ChemEl.C){ neighbours.remove(i); } } - while (neighbours.size()==1){ + while (neighbours.size() == 1){ counter++; - if (counter>5){ + if (counter > 5){ break; } Atom nextAtom = neighbours.get(0); if (nextAtom.getAtomIsInACycle()){ break; } - nextAtom.addLocant(traditionalAlkanePositionNames[counter]); + String traditionalLocant = traditionalAlkanePositionNames[counter]; + if (!nextAtom.hasLocant(traditionalLocant)){ + nextAtom.addLocant(traditionalLocant); + } neighbours = nextAtom.getAtomNeighbours(); neighbours.remove(previousAtom); for (int i = neighbours.size()-1; i >=0; i--) {//only consider carbon atoms - if (!neighbours.get(i).getElement().equals("C")){ + if (neighbours.get(i).getElement() != ChemEl.C){ neighbours.remove(i); } } @@ -606,26 +633,26 @@ } else if (groupType.equals(CHAIN_TYPE_VAL) && ALKANESTEM_SUBTYPE_VAL.equals(group.getAttributeValue(SUBTYPE_ATR))){ List atomList = thisFrag.getAtomList(); - if (atomList.size()==1){ + if (atomList.size() == 1){ return; } - Element possibleSuffix = (Element) XOMTools.getNextSibling(group, SUFFIX_EL); - Boolean terminalSuffixWithNoSuffixPrefixPresent =false; - if (possibleSuffix!=null && TERMINAL_SUBTYPE_VAL.equals(possibleSuffix.getAttributeValue(SUBTYPE_ATR)) && possibleSuffix.getAttribute(SUFFIXPREFIX_ATR)==null){ - terminalSuffixWithNoSuffixPrefixPresent =true; + Element possibleSuffix = OpsinTools.getNextSibling(group, SUFFIX_EL); + Boolean terminalSuffixWithNoSuffixPrefixPresent = false; + if (possibleSuffix!=null && TERMINAL_SUBTYPE_VAL.equals(possibleSuffix.getAttributeValue(SUBTYPE_ATR)) && possibleSuffix.getAttribute(SUFFIXPREFIX_ATR) == null){ + terminalSuffixWithNoSuffixPrefixPresent = true; } for (Atom atom : atomList) { String firstLocant = atom.getFirstLocant(); - if (!atom.getAtomIsInACycle() && firstLocant!=null && firstLocant.length()==1 && Character.isDigit(firstLocant.charAt(0))){ + if (!atom.getAtomIsInACycle() && firstLocant != null && firstLocant.length() == 1 && Character.isDigit(firstLocant.charAt(0))){ int locantNumber = Integer.parseInt(firstLocant); if (terminalSuffixWithNoSuffixPrefixPresent){ - if (locantNumber>1 && locantNumber<=7){ - atom.addLocant(traditionalAlkanePositionNames[locantNumber-2]); + if (locantNumber > 1 && locantNumber <= 7){ + atom.addLocant(traditionalAlkanePositionNames[locantNumber - 2]); } } else{ - if (locantNumber>0 && locantNumber<=6){ - atom.addLocant(traditionalAlkanePositionNames[locantNumber-1]); + if (locantNumber > 0 && locantNumber <= 6){ + atom.addLocant(traditionalAlkanePositionNames[locantNumber - 1]); } } } @@ -633,14 +660,35 @@ } } + private static void applyHomologyGroupLabelsIfSpecified(Element group, Fragment frag) { + String homologyValsStr = group.getAttributeValue(HOMOLOGY_ATR); + if (homologyValsStr != null) { + String[] vals = homologyValsStr.split(";"); + + List homologyAtoms = new ArrayList(); + for (Atom a : frag.getAtomList()) { + if (a.getElement() == ChemEl.R) { + homologyAtoms.add(a); + } + } + int count = vals.length; + if (count != homologyAtoms.size()) { + throw new RuntimeException("OPSIN Bug: Number of homology atoms should match number of homology labels! for: " + group.getValue() ); + } + for (int i = 0; i < count; i++) { + homologyAtoms.get(i).setProperty(Atom.HOMOLOGY_GROUP, vals[i]); + } + } + } + private void processChargeAndOxidationNumberSpecification(Element group, Fragment frag) { - Element nextEl = (Element) XOMTools.getNextSibling(group); - if (nextEl!=null){ - if(nextEl.getLocalName().equals(CHARGESPECIFIER_EL)){ + Element nextEl = OpsinTools.getNextSibling(group); + if (nextEl != null){ + if(nextEl.getName().equals(CHARGESPECIFIER_EL)) { frag.getFirstAtom().setCharge(Integer.parseInt(nextEl.getAttributeValue(VALUE_ATR))); nextEl.detach(); } - if(nextEl.getLocalName().equals(OXIDATIONNUMBERSPECIFIER_EL)){ + if(nextEl.getName().equals(OXIDATIONNUMBERSPECIFIER_EL)) { frag.getFirstAtom().setProperty(Atom.OXIDATION_NUMBER, Integer.parseInt(nextEl.getAttributeValue(VALUE_ATR))); nextEl.detach(); } @@ -648,110 +696,106 @@ } /** - * Removes substituents which are just a hydro/perhydro element and moves their contents to be in front of the next in scope ring + * Removes a substituent is just hydro/perhydro elements and moves its contents to be in front of the next in scope ring * @param substituent - * @return true is the substituent was a hydro substituent and hence was removed + * @return true if the substituent was a hydro substituent and hence was removed * @throws ComponentGenerationException */ private boolean removeAndMoveToAppropriateGroupIfHydroSubstituent(Element substituent) throws ComponentGenerationException { - Elements hydroElements = substituent.getChildElements(HYDRO_EL); - if (hydroElements.size() > 0 && substituent.getChildElements(GROUP_EL).size()==0){ - Element hydroSubstituent = substituent; - if (hydroElements.size()!=1){ - throw new ComponentGenerationException("Unexpected number of hydro elements found in substituent"); - } - Element hydroElement = hydroElements.get(0); - String hydroValue = hydroElement.getValue(); - if (hydroValue.equals("hydro")){ - Element multiplier = (Element) XOMTools.getPreviousSibling(hydroElement); - if (multiplier == null || !multiplier.getLocalName().equals(MULTIPLIER_EL) ){ - throw new ComponentGenerationException("Multiplier expected but not found before hydro subsituent"); - } - if (Integer.parseInt(multiplier.getAttributeValue(VALUE_ATR)) %2 !=0){ - throw new ComponentGenerationException("Hydro can only be added in pairs but multiplier was odd: " + multiplier.getAttributeValue(VALUE_ATR)); - } - } - Element targetRing =null; - Node nextSubOrRootOrBracket = XOMTools.getNextSibling(hydroSubstituent); - //first check adjacent substituent/root. If the hydroelement has one locant or the ring is locantless then we can assume the hydro is acting as a nondetachable prefix - Element potentialRing =((Element)nextSubOrRootOrBracket).getFirstChildElement(GROUP_EL); - if (potentialRing!=null && containsCyclicAtoms(potentialRing)){ - Element possibleLocantInFrontOfHydro = XOMTools.getPreviousSiblingIgnoringCertainElements(hydroElement, new String[]{MULTIPLIER_EL}); - if (possibleLocantInFrontOfHydro !=null && possibleLocantInFrontOfHydro.getLocalName().equals(LOCANT_EL) && MATCH_COMMA.split(possibleLocantInFrontOfHydro.getValue()).length==1){ + List hydroElements = substituent.getChildElements(HYDRO_EL); + if (hydroElements.size() > 0) { + Element targetRing = null; + final Element adjacentSubOrRootOrBracket = OpsinTools.getNextSibling(substituent); + if (adjacentSubOrRootOrBracket == null) { + throw new ComponentGenerationException("Cannot find ring for hydro substituent to apply to"); + } + //first check adjacent substituent/root. If the hydro element has one locant or the ring is locantless then we can assume the hydro is acting as a nondetachable prefix + Element potentialRing = adjacentSubOrRootOrBracket.getFirstChildElement(GROUP_EL); + if (potentialRing != null && containsCyclicAtoms(potentialRing)) { + Element possibleLocantInFrontOfHydro = OpsinTools.getPreviousSibling(hydroElements.get(0)); + if (possibleLocantInFrontOfHydro != null && possibleLocantInFrontOfHydro.getName().equals(LOCANT_EL) && possibleLocantInFrontOfHydro.getValue().split(",").length == 1) { //e.g.4-decahydro-1-naphthalenyl - targetRing =potentialRing; + targetRing = potentialRing; } else{ - Element possibleLocantInFrontOfRing =(Element) XOMTools.getPreviousSibling(potentialRing, LOCANT_EL); - if (possibleLocantInFrontOfRing !=null){ - if (potentialRing.getAttribute(FRONTLOCANTSEXPECTED_ATR)!=null){//check whether the group was expecting a locant e.g. 2-furyl + Element possibleLocantInFrontOfRing = OpsinTools.getPreviousSibling(potentialRing, LOCANT_EL); + if (possibleLocantInFrontOfRing != null) { + if (potentialRing.getAttribute(FRONTLOCANTSEXPECTED_ATR) != null) {//check whether the group was expecting a locant e.g. 2-furyl String locantValue = possibleLocantInFrontOfRing.getValue(); - String[] expectedLocants = MATCH_COMMA.split(potentialRing.getAttributeValue(FRONTLOCANTSEXPECTED_ATR)); + String[] expectedLocants = potentialRing.getAttributeValue(FRONTLOCANTSEXPECTED_ATR).split(","); for (String expectedLocant : expectedLocants) { if (locantValue.equals(expectedLocant)){ - targetRing =potentialRing; + targetRing = potentialRing; break; } } } - //check whether the group is a HW system e.g. 1,3-thiazole - if (potentialRing.getAttributeValue(SUBTYPE_ATR).equals(HANTZSCHWIDMAN_SUBTYPE_VAL)){ - String locantValue = possibleLocantInFrontOfRing.getValue(); - int locants = MATCH_COMMA.split(locantValue).length; - int heteroCount = 0; - Element currentElem = (Element) XOMTools.getNextSibling(possibleLocantInFrontOfRing); - while(!currentElem.equals(potentialRing)){ - if(currentElem.getLocalName().equals(HETEROATOM_EL)) { - heteroCount++; - } else if (currentElem.getLocalName().equals(MULTIPLIER_EL)){ - heteroCount += Integer.parseInt(currentElem.getAttributeValue(VALUE_ATR)) -1; - } - currentElem = (Element)XOMTools.getNextSibling(currentElem); - } - if (heteroCount==locants){//number of locants must match number - targetRing =potentialRing; - } - } //check whether the group is a benzofused ring e.g. 1,4-benzodioxin if (FUSIONRING_SUBTYPE_VAL.equals(potentialRing.getAttributeValue(SUBTYPE_ATR)) && (potentialRing.getValue().equals("benzo")|| potentialRing.getValue().equals("benz")) && - !((Element)XOMTools.getNextSibling(potentialRing)).getLocalName().equals(FUSION_EL)){ - targetRing =potentialRing; + !OpsinTools.getNextSibling(potentialRing).getName().equals(FUSION_EL)){ + targetRing = potentialRing; } } else{ - targetRing =potentialRing; + targetRing = potentialRing; } } } //that didn't match so the hydro appears to be a detachable prefix. detachable prefixes attach in preference to the rightmost applicable group so search any remaining substituents/roots from right to left - if (targetRing ==null){ - Element nextSubOrRootOrBracketFromLast = (Element) hydroSubstituent.getParent().getChild(hydroSubstituent.getParent().getChildCount()-1);//the last sibling - while (!nextSubOrRootOrBracketFromLast.equals(hydroSubstituent)){ + if (targetRing == null) { + Element nextSubOrRootOrBracketFromLast = substituent.getParent().getChild(substituent.getParent().getChildCount() - 1);//the last sibling + while (!nextSubOrRootOrBracketFromLast.equals(substituent)){ potentialRing = nextSubOrRootOrBracketFromLast.getFirstChildElement(GROUP_EL); - if (potentialRing!=null && containsCyclicAtoms(potentialRing)){ - targetRing =potentialRing; + if (potentialRing != null && containsCyclicAtoms(potentialRing)){ + targetRing = potentialRing; break; } else{ - nextSubOrRootOrBracketFromLast = (Element) XOMTools.getPreviousSibling(nextSubOrRootOrBracketFromLast); + nextSubOrRootOrBracketFromLast = OpsinTools.getPreviousSibling(nextSubOrRootOrBracketFromLast); } } } - if (targetRing ==null){ + if (targetRing == null) { throw new ComponentGenerationException("Cannot find ring for hydro substituent to apply to"); } //move the children of the hydro substituent - Elements children =hydroSubstituent.getChildElements(); - for (int i = children.size()-1; i >=0 ; i--) { - Element child =children.get(i); - if (!child.getLocalName().equals(HYPHEN_EL)){ + List children = substituent.getChildElements(); + Element targetSubstituent = targetRing.getParent(); + if (targetSubstituent.equals(adjacentSubOrRootOrBracket)) { + for (int i = children.size()-1; i >=0 ; i--) { + Element child = children.get(i); + if (child.getName().equals(HYPHEN_EL)) { + continue; + } child.detach(); - targetRing.getParent().insertChild(child, 0); + targetSubstituent.insertChild(child, 0); + } + } + else { + boolean inDetachablePrefix = true; + for (int i = children.size()-1; i >=0 ; i--) { + Element child = children.get(i); + String elName = child.getName(); + if (elName.equals(HYPHEN_EL)) { + continue; + } + else if (inDetachablePrefix && elName.equals(HYDRO_EL)) { + child.detach(); + targetSubstituent.insertChild(child, 0); + } + else if (elName.equals(STEREOCHEMISTRY_EL)) { + inDetachablePrefix = false; + child.detach(); + adjacentSubOrRootOrBracket.insertChild(child, 0); + } + else { + throw new ComponentGenerationException("Unexpected term found before detachable hydro prefix: " + child.getValue() ); + } } } - hydroSubstituent.detach(); + substituent.detach(); return true; } return false; @@ -761,29 +805,28 @@ /** * Removes substituents which are just a subtractivePrefix element e.g. deoxy and moves their contents to be in front of the next in scope biochemical fragment (or failing that group) * @param substituent - * @return true is the substituent was a subtractivePrefix substituent and hence was removed + * @return true if the substituent was a subtractivePrefix substituent and hence was removed * @throws ComponentGenerationException */ static boolean removeAndMoveToAppropriateGroupIfSubtractivePrefix(Element substituent) throws ComponentGenerationException { - Elements subtractivePrefixes = substituent.getChildElements(SUBTRACTIVEPREFIX_EL); - if (subtractivePrefixes.size() > 0){ - if (subtractivePrefixes.size()!=1){ - throw new RuntimeException("Unexpected number of subtractive prefixes found in substituent"); - } - Element subtractivePrefix = subtractivePrefixes.get(0); - Element biochemicalGroup =null;//preferred - Element standardGroup =null; - Node nextSubOrRootOrBracket = XOMTools.getNextSibling(substituent); + List subtractivePrefixes = substituent.getChildElements(SUBTRACTIVEPREFIX_EL); + if (subtractivePrefixes.size() > 0) { + Element biochemicalGroup = null;//preferred + Element standardGroup = null; + final Element adjacentSubOrRootOrBracket = OpsinTools.getNextSibling(substituent); + Element nextSubOrRootOrBracket = adjacentSubOrRootOrBracket; if (nextSubOrRootOrBracket == null){ - throw new ComponentGenerationException("Unable to find group for: " + subtractivePrefix.getValue() +" to apply to!"); + throw new ComponentGenerationException("Unable to find group for: " + subtractivePrefixes.get(0).getValue() +" to apply to!"); } //prefer the nearest (unlocanted) biochemical group or the rightmost standard group - while (nextSubOrRootOrBracket != null){ - Element groupToConsider = ((Element) nextSubOrRootOrBracket).getFirstChildElement(GROUP_EL); - if (groupToConsider!=null){ - if (BIOCHEMICAL_SUBTYPE_VAL.equals(groupToConsider.getAttributeValue(SUBTYPE_ATR)) || groupToConsider.getAttributeValue(TYPE_ATR).equals(CARBOHYDRATE_TYPE_VAL)){ + while (nextSubOrRootOrBracket != null) { + Element groupToConsider = nextSubOrRootOrBracket.getFirstChildElement(GROUP_EL); + if (groupToConsider != null) { + String type = groupToConsider.getAttributeValue(TYPE_ATR); + String subType = groupToConsider.getAttributeValue(SUBTYPE_ATR); + if (BIOCHEMICAL_SUBTYPE_VAL.equals(subType) || CARBOHYDRATE_TYPE_VAL.equals(type) || AMINOACID_TYPE_VAL.equals(type)){ biochemicalGroup = groupToConsider; - if (XOMTools.getPreviousSiblingsOfType(biochemicalGroup, LOCANT_EL).size() == 0){ + if (OpsinTools.getPreviousSiblingsOfType(biochemicalGroup, LOCANT_EL).size() == 0) { break; } } @@ -791,33 +834,46 @@ standardGroup = groupToConsider; } } - nextSubOrRootOrBracket = (Element) XOMTools.getNextSibling(nextSubOrRootOrBracket); + nextSubOrRootOrBracket = OpsinTools.getNextSibling(nextSubOrRootOrBracket); } - Element targetGroup = biochemicalGroup!=null ? biochemicalGroup : standardGroup; - if (targetGroup == null){ - throw new ComponentGenerationException("Unable to find group for: " + subtractivePrefix.getValue() +" to apply to!"); - } - if (subtractivePrefix.getAttributeValue(TYPE_ATR).equals(ANHYDRO_TYPE_VAL)){ - Element locant = (Element) XOMTools.getPreviousSibling(subtractivePrefix); - if (locant == null || !locant.getLocalName().equals(LOCANT_EL)){ - throw new ComponentGenerationException("Two locants are required before an anhydro prefix"); - } - String locantStr = locant.getValue(); - if (MATCH_COMMA.split(locantStr).length != 2){ - throw new ComponentGenerationException("Two locants are required before an anhydro prefix, but found: "+ locantStr); - } - subtractivePrefix.addAttribute(new Attribute(LOCANT_ATR, locantStr)); - locant.detach(); + Element targetGroup = biochemicalGroup != null ? biochemicalGroup : standardGroup; + if (targetGroup == null) { + throw new ComponentGenerationException("Unable to find group for: " + subtractivePrefixes.get(0).getValue() +" to apply to!"); } //move the children of the subtractivePrefix substituent - Elements children =substituent.getChildElements(); - for (int i = children.size()-1; i >=0 ; i--) { - Element child =children.get(i); - if (!child.getLocalName().equals(HYPHEN_EL)){ - child.detach(); - targetGroup.getParent().insertChild(child, 0); + List children = substituent.getChildElements(); + Element targetSubstituent = targetGroup.getParent(); + if (targetSubstituent.equals(adjacentSubOrRootOrBracket)) { + for (int i = children.size()-1; i >=0 ; i--) { + Element child =children.get(i); + if (!child.getName().equals(HYPHEN_EL)){ + child.detach(); + targetSubstituent.insertChild(child, 0); + } + } + } + else { + boolean inDetachablePrefix = true; + for (int i = children.size()-1; i >=0 ; i--) { + Element child = children.get(i); + String elName = child.getName(); + if (elName.equals(HYPHEN_EL)) { + continue; + } + else if (inDetachablePrefix && elName.equals(SUBTRACTIVEPREFIX_EL)) { + child.detach(); + targetSubstituent.insertChild(child, 0); + } + else if (elName.equals(STEREOCHEMISTRY_EL)) { + inDetachablePrefix = false; + child.detach(); + adjacentSubOrRootOrBracket.insertChild(child, 0); + } + else { + throw new ComponentGenerationException("Unexpected term found before detachable substractive prefix: " + child.getValue() ); + } } } substituent.detach(); @@ -826,9 +882,87 @@ return false; } + /** + * Removes substituents which are just a fused ring element and moves their contents to be in front of the next in scope ring + * @param substituent + * @return true if the substituent was a ring bridge and hence was removed + * @throws ComponentGenerationException + */ + private boolean removeAndMoveToAppropriateGroupIfRingBridge(Element substituent) throws ComponentGenerationException { + List ringBridges = substituent.getChildElements(FUSEDRINGBRIDGE_EL); + if (ringBridges.size() > 0) { + final Element adjacentSubOrRootOrBracket = OpsinTools.getNextSibling(substituent); + Element nextSubOrRootOrBracket = adjacentSubOrRootOrBracket; + if (nextSubOrRootOrBracket == null){ + throw new ComponentGenerationException("Unable to find group for: " + ringBridges.get(0).getValue() +" to apply to!"); + } + Element targetGroup = null; + Element standardGroup = null; + //prefer the nearest (unlocanted) ring group or the rightmost standard group + while (nextSubOrRootOrBracket != null) { + Element groupToConsider = nextSubOrRootOrBracket.getFirstChildElement(GROUP_EL); + if (groupToConsider != null) { + if (containsCyclicAtoms(groupToConsider) && OpsinTools.getPreviousSiblingsOfType(groupToConsider, LOCANT_EL).size() == 0) { + targetGroup = groupToConsider; + break; + } + else { + standardGroup = groupToConsider; + } + } + nextSubOrRootOrBracket = OpsinTools.getNextSibling(nextSubOrRootOrBracket); + } + if (targetGroup == null) { + targetGroup = standardGroup; + } + if (targetGroup == null) { + throw new ComponentGenerationException("Unable to find group for: " + ringBridges.get(0).getValue() +" to apply to!"); + } + + //move the children of the fusedRingBridge substituent + List children = substituent.getChildElements(); + Element targetSubstituent = targetGroup.getParent(); + if (targetSubstituent.equals(adjacentSubOrRootOrBracket)) { + for (int i = children.size()-1; i >=0 ; i--) { + Element child =children.get(i); + if (!child.getName().equals(HYPHEN_EL)){ + child.detach(); + targetSubstituent.insertChild(child, 0); + } + } + } + else { + boolean inDetachablePrefix = true; + for (int i = children.size()-1; i >=0 ; i--) { + Element child = children.get(i); + String elName = child.getName(); + if (elName.equals(HYPHEN_EL)) { + continue; + } + else if (inDetachablePrefix && (elName.equals(FUSEDRINGBRIDGE_EL) || + elName.equals(COLONORSEMICOLONDELIMITEDLOCANT_EL) || + elName.equals(LOCANT_EL))) { + child.detach(); + targetSubstituent.insertChild(child, 0); + } + else if (elName.equals(STEREOCHEMISTRY_EL)) { + inDetachablePrefix = false; + child.detach(); + adjacentSubOrRootOrBracket.insertChild(child, 0); + } + else { + throw new ComponentGenerationException("Unexpected term found before detachable ring bridge: " + child.getValue() ); + } + } + } + substituent.detach(); + return true; + } + return false; + } private boolean containsCyclicAtoms(Element potentialRing) { - Fragment potentialRingFrag = state.xmlFragmentMap.get(potentialRing); + Fragment potentialRingFrag = potentialRing.getFrag(); List atomList = potentialRingFrag.getAtomList(); for (Atom atom : atomList) { if (atom.getAtomIsInACycle()){ @@ -848,24 +982,24 @@ * @throws StructureBuildingException */ private void determineLocantMeaning(Element subOrBracketOrRoot, Element finalSubOrRootInWord) throws StructureBuildingException, ComponentGenerationException { - List locants = XOMTools.getChildElementsWithTagName(subOrBracketOrRoot, LOCANT_EL); - Element group =subOrBracketOrRoot.getFirstChildElement(GROUP_EL);//will be null if element is a bracket + List locants = subOrBracketOrRoot.getChildElements(LOCANT_EL); + Element group = subOrBracketOrRoot.getFirstChildElement(GROUP_EL);//will be null if element is a bracket for (Element locant : locants) { - String[] locantValues = MATCH_COMMA.split(locant.getValue()); + String[] locantValues = locant.getValue().split(","); if(locantValues.length > 1) { - Element afterLocant = (Element)XOMTools.getNextSibling(locant); + Element afterLocant = OpsinTools.getNextSibling(locant); int structuralBracketDepth = 0; Element multiplierEl = null; - while (afterLocant !=null){ - String elName = afterLocant.getLocalName(); + while (afterLocant != null){ + String elName = afterLocant.getName(); if (elName.equals(STRUCTURALOPENBRACKET_EL)){ structuralBracketDepth++; } else if (elName.equals(STRUCTURALCLOSEBRACKET_EL)){ structuralBracketDepth--; } - if (structuralBracketDepth!=0){ - afterLocant = (Element)XOMTools.getNextSibling(afterLocant); + if (structuralBracketDepth != 0){ + afterLocant = OpsinTools.getNextSibling(afterLocant); continue; } if(elName.equals(LOCANT_EL)) { @@ -873,66 +1007,66 @@ } else if (elName.equals(MULTIPLIER_EL)){ if (locantValues.length == Integer.parseInt(afterLocant.getAttributeValue(VALUE_ATR))){ - if (afterLocant.equals(XOMTools.getNextSiblingIgnoringCertainElements(locant, new String[]{INDICATEDHYDROGEN_EL}))){ + if (afterLocant.equals(OpsinTools.getNextSiblingIgnoringCertainElements(locant, new String[]{INDICATEDHYDROGEN_EL}))){ //direct locant, typical case. An exception is made for indicated hydrogen e.g. 1,2,4-1H-triazole multiplierEl = afterLocant; break; } else{ - Element afterMultiplier = (Element) XOMTools.getNextSibling(afterLocant); - if (afterMultiplier!=null && (afterMultiplier.getLocalName().equals(SUFFIX_EL) || afterMultiplier.getLocalName().equals(INFIX_EL) - || afterMultiplier.getLocalName().equals(UNSATURATOR_EL) || afterMultiplier.getLocalName().equals(GROUP_EL))){ + Element afterMultiplier = OpsinTools.getNextSibling(afterLocant); + if (afterMultiplier!=null && (afterMultiplier.getName().equals(SUFFIX_EL) || afterMultiplier.getName().equals(INFIX_EL) + || afterMultiplier.getName().equals(UNSATURATOR_EL) || afterMultiplier.getName().equals(GROUP_EL))){ multiplierEl = afterLocant; //indirect locant break; } } } - if (afterLocant.equals(XOMTools.getNextSibling(locant))){//if nothing better can be found report this as a locant/multiplier mismatch + if (afterLocant.equals(OpsinTools.getNextSibling(locant))){//if nothing better can be found report this as a locant/multiplier mismatch multiplierEl = afterLocant; } } - else if (elName.equals(RINGASSEMBLYMULTIPLIER_EL)&& afterLocant.equals(XOMTools.getNextSibling(locant))){//e.g. 1,1'-biphenyl + else if (elName.equals(RINGASSEMBLYMULTIPLIER_EL) && afterLocant.equals(OpsinTools.getNextSibling(locant))){//e.g. 1,1'-biphenyl multiplierEl = afterLocant; - if (!FragmentTools.allAtomsInRingAreIdentical(state.xmlFragmentMap.get(group))){//if all atoms are identical then the locant may refer to suffixes + if (!FragmentTools.allAtomsInRingAreIdentical(group.getFrag())){//if all atoms are identical then the locant may refer to suffixes break; } } - else if (elName.equals(FUSEDRINGBRIDGE_EL)&& locantValues.length ==2 && afterLocant.equals(XOMTools.getNextSibling(locant))){//e.g. 1,8-methano + else if (elName.equals(FUSEDRINGBRIDGE_EL)&& locantValues.length ==2 && afterLocant.equals(OpsinTools.getNextSibling(locant))){//e.g. 1,8-methano break; } - afterLocant = (Element)XOMTools.getNextSibling(afterLocant); + afterLocant = OpsinTools.getNextSibling(afterLocant); } if(multiplierEl != null) { if(Integer.parseInt(multiplierEl.getAttributeValue(VALUE_ATR)) == locantValues.length ) { // number of locants and multiplier agree - boolean locantModified =false;//did determineLocantMeaning do something? + boolean locantModified = false;//did determineLocantMeaning do something? if (locantValues[locantValues.length-1].endsWith("'") && group!=null && subOrBracketOrRoot.indexOf(group) > subOrBracketOrRoot.indexOf(locant)){//quite possible that this is referring to a multiplied root - if (group.getAttribute(OUTIDS_ATR)!=null && MATCH_COMMA.split(group.getAttributeValue(OUTIDS_ATR)).length>1){ + if (group.getAttribute(OUTIDS_ATR)!=null && group.getAttributeValue(OUTIDS_ATR).split(",").length>1){ locantModified = checkSpecialLocantUses(locant, locantValues, finalSubOrRootInWord); } else{ - Element afterGroup = (Element)XOMTools.getNextSibling(group); + Element afterGroup = OpsinTools.getNextSibling(group); int inlineSuffixCount =0; - int multiplier=1; - while (afterGroup !=null){ - if(afterGroup.getLocalName().equals(MULTIPLIER_EL)){ + int multiplier = 1; + while (afterGroup != null){ + if(afterGroup.getName().equals(MULTIPLIER_EL)){ multiplier =Integer.parseInt(afterGroup.getAttributeValue(VALUE_ATR)); } - else if(afterGroup.getLocalName().equals(SUFFIX_EL) && afterGroup.getAttributeValue(TYPE_ATR).equals(INLINE_TYPE_VAL)){ + else if(afterGroup.getName().equals(SUFFIX_EL) && afterGroup.getAttributeValue(TYPE_ATR).equals(INLINE_TYPE_VAL)){ inlineSuffixCount +=(multiplier); multiplier=1; } - afterGroup = (Element)XOMTools.getNextSibling(afterGroup); + afterGroup = OpsinTools.getNextSibling(afterGroup); } if (inlineSuffixCount >=2){ locantModified = checkSpecialLocantUses(locant, locantValues, finalSubOrRootInWord); } } } - if (!locantModified && !XOMTools.getNextSibling(locant).equals(multiplierEl)){//the locants apply indirectly the multiplier e.g. 2,3-butandiol + if (!locantModified && !OpsinTools.getNextSibling(locant).equals(multiplierEl)){//the locants apply indirectly the multiplier e.g. 2,3-butandiol //move the locant to be next to the multiplier. locant.detach(); - XOMTools.insertBefore(multiplierEl, locant); + OpsinTools.insertBefore(multiplierEl, locant); } } else { if(!checkSpecialLocantUses(locant, locantValues, finalSubOrRootInWord)) { @@ -959,26 +1093,26 @@ * @param locantValues The locant values; * @param finalSubOrRootInWord : used to check if a locant is referring to the root as in multiplicative nomenclatures) * @return true if there's a HW system, and agreement; or if the locants conform to one of the alternative possibilities, otherwise false. - * @throws StructureBuildingException + * @throws ComponentGenerationException */ - private boolean checkSpecialLocantUses(Element locant, String[] locantValues, Element finalSubOrRootInWord) throws StructureBuildingException { - int count =locantValues.length; - Element currentElem = (Element)XOMTools.getNextSibling(locant); + private boolean checkSpecialLocantUses(Element locant, String[] locantValues, Element finalSubOrRootInWord) throws ComponentGenerationException { + int count = locantValues.length; + Element currentElem = OpsinTools.getNextSibling(locant); int heteroCount = 0; int multiplierValue = 1; - while(currentElem != null && !currentElem.getLocalName().equals(GROUP_EL)){ - if(currentElem.getLocalName().equals(HETEROATOM_EL)) { - heteroCount+=multiplierValue; - multiplierValue =1; - } else if (currentElem.getLocalName().equals(MULTIPLIER_EL)){ + while(currentElem != null && !currentElem.getName().equals(GROUP_EL)){ + if(currentElem.getName().equals(HETEROATOM_EL)) { + heteroCount += multiplierValue; + multiplierValue = 1; + } else if (currentElem.getName().equals(MULTIPLIER_EL)){ multiplierValue = Integer.parseInt(currentElem.getAttributeValue(VALUE_ATR)); } else{ break; } - currentElem = (Element)XOMTools.getNextSibling(currentElem); + currentElem = OpsinTools.getNextSibling(currentElem); } - if(currentElem != null && currentElem.getLocalName().equals(GROUP_EL)){ + if(currentElem != null && currentElem.getName().equals(GROUP_EL)){ if (currentElem.getAttributeValue(SUBTYPE_ATR).equals(HANTZSCHWIDMAN_SUBTYPE_VAL)) { if(heteroCount == count) { return true; @@ -986,19 +1120,19 @@ return false;//there is a case where locants don't apply to heteroatoms in a HW system, but in that case only one locant is expected so this function would not be called } } - if (heteroCount==0 && currentElem.getAttribute(OUTIDS_ATR)!=null ) {//e.g. 1,4-phenylene - String[] outIDs = MATCH_COMMA.split(currentElem.getAttributeValue(OUTIDS_ATR), -1); - Fragment groupFragment =state.xmlFragmentMap.get(currentElem); - if (count ==outIDs.length && groupFragment.getAtomList().size()>1){//things like oxy do not need to have their outIDs specified + if (heteroCount == 0 && currentElem.getAttribute(OUTIDS_ATR) != null ) {//e.g. 1,4-phenylene + String[] outIDs = currentElem.getAttributeValue(OUTIDS_ATR).split(",", -1); + Fragment groupFragment = currentElem.getFrag(); + if (count ==outIDs.length && groupFragment.getAtomCount() > 1){//things like oxy do not need to have their outIDs specified int idOfFirstAtomInFrag =groupFragment.getIdOfFirstAtom(); boolean foundLocantNotPresentOnFragment = false; - for (int i = outIDs.length-1; i >=0; i--) { + for (int i = outIDs.length - 1; i >=0; i--) { Atom a =groupFragment.getAtomByLocant(locantValues[i]); - if (a==null){ + if (a == null){ foundLocantNotPresentOnFragment = true; break; } - outIDs[i]=Integer.toString(a.getID() -idOfFirstAtomInFrag +1);//convert to relative id + outIDs[i] = Integer.toString(a.getID() - idOfFirstAtomInFrag + 1);//convert to relative id } if (!foundLocantNotPresentOnFragment){ currentElem.getAttribute(OUTIDS_ATR).setValue(StringTools.arrayToString(outIDs, ",")); @@ -1008,14 +1142,14 @@ } } else if(currentElem.getValue().equals("benz") || currentElem.getValue().equals("benzo")){ - Node potentialGroupAfterBenzo = XOMTools.getNextSibling(currentElem, GROUP_EL);//need to make sure this isn't benzyl + Element potentialGroupAfterBenzo = OpsinTools.getNextSibling(currentElem, GROUP_EL);//need to make sure this isn't benzyl if (potentialGroupAfterBenzo!=null){ return true;//e.g. 1,2-benzothiazole } } } if(currentElem != null) { - String name = currentElem.getLocalName(); + String name = currentElem.getName(); if (name.equals(POLYCYCLICSPIRO_EL)){ return true; } @@ -1027,23 +1161,37 @@ locant.detach(); return true; } + else if (name.equals(SUBTRACTIVEPREFIX_EL) && ANHYDRO_TYPE_VAL.equals(currentElem.getAttributeValue(TYPE_ATR))){ + if (count != 2) { + throw new ComponentGenerationException("Two locants are required before an anhydro prefix, but found: "+ locant.getValue()); + } + currentElem.addAttribute(new Attribute(LOCANT_ATR, locant.getValue())); + locant.detach(); + return true; + } } boolean detectedMultiplicativeNomenclature = detectMultiplicativeNomenclature(locant, locantValues, finalSubOrRootInWord); if (detectedMultiplicativeNomenclature){ return true; } - if (currentElem != null && count ==2 && currentElem.getLocalName().equals(GROUP_EL) && EPOXYLIKE_SUBTYPE_VAL.equals(currentElem.getAttributeValue(SUBTYPE_ATR))){ - return true; + if (currentElem != null && count ==2 && currentElem.getName().equals(GROUP_EL)){ + if (EPOXYLIKE_SUBTYPE_VAL.equals(currentElem.getAttributeValue(SUBTYPE_ATR))){ + return true; + } + if ("yes".equals(currentElem.getAttributeValue(IMINOLIKE_ATR))){ + currentElem.getAttribute(SUBTYPE_ATR).setValue(EPOXYLIKE_SUBTYPE_VAL); + return true; + } } - Element parentElem = (Element) locant.getParent(); - if (count==2 && parentElem.getLocalName().equals(BRACKET_EL)){//e.g. 3,4-(dichloromethylenedioxy) this is changed to (dichloro3,4-methylenedioxy) - List substituents = XOMTools.getChildElementsWithTagName(parentElem, SUBSTITUENT_EL); - if (substituents.size()>0){ - Element finalSub = substituents.get(substituents.size()-1); + Element parentElem = locant.getParent(); + if (count == 2 && parentElem.getName().equals(BRACKET_EL)){//e.g. 3,4-(dichloromethylenedioxy) this is changed to (dichloro3,4-methylenedioxy) + List substituents = parentElem.getChildElements(SUBSTITUENT_EL); + if (substituents.size() > 0){ + Element finalSub = substituents.get(substituents.size() - 1); Element group = finalSub.getFirstChildElement(GROUP_EL); if (EPOXYLIKE_SUBTYPE_VAL.equals(group.getAttributeValue(SUBTYPE_ATR))){ locant.detach(); - XOMTools.insertBefore(group, locant); + OpsinTools.insertBefore(group, locant); return true; } } @@ -1062,73 +1210,83 @@ */ private boolean detectMultiplicativeNomenclature(Element locant, String[] locantValues, Element finalSubOrRootInWord) { int count =locantValues.length; - Element multiplier =(Element) finalSubOrRootInWord.getChild(0); - if (((Element)finalSubOrRootInWord.getParent()).getLocalName().equals(BRACKET_EL)){//e.g. 1,1'-ethynediylbis(1-cyclopentanol) - if (!multiplier.getLocalName().equals(MULTIPLIER_EL)){ - multiplier =(Element) finalSubOrRootInWord.getParent().getChild(0); + Element multiplier = finalSubOrRootInWord.getChild(0); + if (finalSubOrRootInWord.getParent().getName().equals(BRACKET_EL)){//e.g. 1,1'-ethynediylbis(1-cyclopentanol) + if (!multiplier.getName().equals(MULTIPLIER_EL)){ + multiplier = finalSubOrRootInWord.getParent().getChild(0); } else{ - Element elAfterMultiplier = (Element) XOMTools.getNextSibling(multiplier); - String elName = elAfterMultiplier.getLocalName(); + Element elAfterMultiplier = OpsinTools.getNextSibling(multiplier); + String elName = elAfterMultiplier.getName(); if (elName.equals(HETEROATOM_EL) || elName.equals(SUBTRACTIVEPREFIX_EL)|| (elName.equals(HYDRO_EL) && !elAfterMultiplier.getValue().startsWith("per"))|| elName.equals(FUSEDRINGBRIDGE_EL)) { - multiplier =(Element) finalSubOrRootInWord.getParent().getChild(0); + multiplier = finalSubOrRootInWord.getParent().getChild(0); } } } - Node commonParent =locant.getParent().getParent();//this should be a common parent of the multiplier in front of the root. If it is not, then this locant is in a different scope - Node parentOfMultiplier =multiplier.getParent(); + Element commonParent = locant.getParent().getParent();//this should be a common parent of the multiplier in front of the root. If it is not, then this locant is in a different scope + Element parentOfMultiplier =multiplier.getParent(); while (parentOfMultiplier!=null){ if (commonParent.equals(parentOfMultiplier)){ if (locantValues[count-1].endsWith("'") && - multiplier.getLocalName().equals(MULTIPLIER_EL) && !((Element)XOMTools.getNextSibling(multiplier)).getLocalName().equals(MULTIPLICATIVELOCANT_EL) && + multiplier.getName().equals(MULTIPLIER_EL) && !OpsinTools.getNextSibling(multiplier).getName().equals(MULTIPLICATIVELOCANT_EL) && Integer.parseInt(multiplier.getAttributeValue(VALUE_ATR)) == count ){//multiplicative nomenclature - locant.setLocalName(MULTIPLICATIVELOCANT_EL); + locant.setName(MULTIPLICATIVELOCANT_EL); locant.detach(); - XOMTools.insertAfter(multiplier, locant); + OpsinTools.insertAfter(multiplier, locant); return true; } } - parentOfMultiplier=parentOfMultiplier.getParent(); + parentOfMultiplier = parentOfMultiplier.getParent(); } return false; } private void applyDLPrefixes(Element subOrRoot) throws ComponentGenerationException { - Elements dlStereochemistryEls = subOrRoot.getChildElements(DLSTEREOCHEMISTRY_EL); - for (int i = 0; i < dlStereochemistryEls.size(); i++) { - Element dlStereochemistry = dlStereochemistryEls.get(i); + List dlStereochemistryEls = OpsinTools.getChildElementsWithTagNameAndAttribute(subOrRoot, STEREOCHEMISTRY_EL, TYPE_ATR, DLSTEREOCHEMISTRY_TYPE_VAL); + for (Element dlStereochemistry : dlStereochemistryEls) { String dlStereochemistryValue = dlStereochemistry.getAttributeValue(VALUE_ATR); - Element elementToApplyTo = (Element) XOMTools.getNextSibling(dlStereochemistry); - if (elementToApplyTo ==null){ - throw new RuntimeException("OPSIN bug: DL stereochemistry found in inappropriate position"); + Element elementToApplyTo = OpsinTools.getNextSibling(dlStereochemistry); + if (elementToApplyTo == null){ + continue; + } + String type = elementToApplyTo.getAttributeValue(TYPE_ATR); + if (OPTICALROTATION_TYPE_VAL.equals(type)) { + elementToApplyTo = OpsinTools.getNextSibling(elementToApplyTo); + if (elementToApplyTo == null) { + continue; + } + type = elementToApplyTo.getAttributeValue(TYPE_ATR); } - if (AMINOACID_TYPE_VAL.equals(elementToApplyTo.getAttributeValue(TYPE_ATR))){ - applyDlStereochemistryToAminoAcid(elementToApplyTo, dlStereochemistryValue); + if (AMINOACID_TYPE_VAL.equals(type)) { + if (!applyDlStereochemistryToAminoAcid(elementToApplyTo, dlStereochemistryValue)){ + continue; + } } - else if (elementToApplyTo.getAttributeValue(TYPE_ATR).equals(CARBOHYDRATE_TYPE_VAL)){ + else if (CARBOHYDRATE_TYPE_VAL.equals(type)) { applyDlStereochemistryToCarbohydrate(elementToApplyTo, dlStereochemistryValue); } - else if (CARBOHYDRATECONFIGURATIONPREFIX_TYPE_VAL.equals(elementToApplyTo.getAttributeValue(TYPE_ATR))){ + else if (CARBOHYDRATECONFIGURATIONPREFIX_TYPE_VAL.equals(type)) { applyDlStereochemistryToCarbohydrateConfigurationalPrefix(elementToApplyTo, dlStereochemistryValue); } else{ - throw new RuntimeException("OPSIN bug: Unrecognised element after DL stereochemistry: " +elementToApplyTo.toXML()); + continue; } dlStereochemistry.detach(); } } - void applyDlStereochemistryToAminoAcid(Element aminoAcidEl, String dlStereochemistryValue) throws ComponentGenerationException { - Fragment aminoAcid = state.xmlFragmentMap.get(aminoAcidEl); + boolean applyDlStereochemistryToAminoAcid(Element aminoAcidEl, String dlStereochemistryValue) throws ComponentGenerationException { + Fragment aminoAcid = aminoAcidEl.getFrag(); List atomList = aminoAcid.getAtomList(); List atomsWithParities = new ArrayList(); for (Atom atom : atomList) { - if (atom.getAtomParity()!=null){ + if (atom.getAtomParity() != null) { atomsWithParities.add(atom); } } - if (atomsWithParities.isEmpty()){ - throw new ComponentGenerationException("D/L stereochemistry :" +dlStereochemistryValue + " found before achiral amino acid"); + if (atomsWithParities.isEmpty()) { + //achiral amino acid... but may become chiral after substitution + return false; } if (dlStereochemistryValue.equals("dl")){ for (Atom atom : atomsWithParities) { @@ -1142,7 +1300,7 @@ } else if (dlStereochemistryValue.equals("d") || dlStereochemistryValue.equals("ds")){ invert = true; } else{ - throw new ComponentGenerationException("Unexpected value for D/L stereochemistry found before amino acid: " + dlStereochemistryValue ); + throw new RuntimeException("OPSIN bug: Unexpected value for D/L stereochemistry found before amino acid: " + dlStereochemistryValue ); } if ("yes".equals(aminoAcidEl.getAttributeValue(NATURALENTISOPPOSITE_ATR))){ invert = !invert; @@ -1154,10 +1312,11 @@ } } } + return true; } void applyDlStereochemistryToCarbohydrate(Element carbohydrateEl, String dlStereochemistryValue) throws ComponentGenerationException { - Fragment carbohydrate = state.xmlFragmentMap.get(carbohydrateEl); + Fragment carbohydrate = carbohydrateEl.getFrag(); List atomList = carbohydrate.getAtomList(); List atomsWithParities = new ArrayList(); for (Atom atom : atomList) { @@ -1196,11 +1355,11 @@ } static void applyDlStereochemistryToCarbohydrateConfigurationalPrefix(Element elementToApplyTo, String dlStereochemistryValue) throws ComponentGenerationException { - if (dlStereochemistryValue.equals("d") || dlStereochemistryValue.equals("dg")){ + if (dlStereochemistryValue.equals("d") || dlStereochemistryValue.equals("dg")) { //do nothing, D- is implicit } - else if (dlStereochemistryValue.equals("l") || dlStereochemistryValue.equals("lg")){ - String[] values = MATCH_SLASH.split(elementToApplyTo.getAttributeValue(VALUE_ATR), -1); + else if (dlStereochemistryValue.equals("l") || dlStereochemistryValue.equals("lg")) { + String[] values = elementToApplyTo.getAttributeValue(VALUE_ATR).split("/", -1); StringBuilder sb = new StringBuilder(); for (String value : values) { if (value.equals("r")){ @@ -1217,8 +1376,8 @@ String newVal = sb.toString().substring(0, sb.length()-1); elementToApplyTo.getAttribute(VALUE_ATR).setValue(newVal); } - else if (dlStereochemistryValue.equals("dl")){ - String[] values = MATCH_SLASH.split(elementToApplyTo.getAttributeValue(VALUE_ATR)); + else if (dlStereochemistryValue.equals("dl")) { + String[] values = elementToApplyTo.getAttributeValue(VALUE_ATR).split("/"); String newVal = "?" + StringTools.multiplyString("/?", values.length-1); elementToApplyTo.getAttribute(VALUE_ATR).setValue(newVal); } @@ -1233,8 +1392,9 @@ * @throws StructureBuildingException */ private void processCarbohydrates(Element subOrRoot) throws StructureBuildingException { - List carbohydrates = XOMTools.getChildElementsWithTagNameAndAttribute(subOrRoot, GROUP_EL, TYPE_ATR, CARBOHYDRATE_TYPE_VAL); + List carbohydrates = OpsinTools.getChildElementsWithTagNameAndAttribute(subOrRoot, GROUP_EL, TYPE_ATR, CARBOHYDRATE_TYPE_VAL); for (Element carbohydrate : carbohydrates) { + Fragment carbohydrateFrag = carbohydrate.getFrag(); String subtype = carbohydrate.getAttributeValue(SUBTYPE_ATR); boolean isAldose; if (CARBOHYDRATESTEMKETOSE_SUBTYPE_VAL.equals(subtype)){ @@ -1244,11 +1404,16 @@ isAldose = true; } else{ + Attribute anomericId = carbohydrate.getAttribute(SUFFIXAPPLIESTO_ATR); + if (anomericId != null){ + Atom anomericCarbon = carbohydrateFrag.getAtomByID(carbohydrateFrag.getIdOfFirstAtom() + Integer.parseInt(anomericId.getValue()) -1); + applyAlphaBetaStereoToCyclisedCarbohydrate(carbohydrate, anomericCarbon); + carbohydrate.removeAttribute(anomericId); + } //trivial carbohydrates don't have suffixes continue; } boolean cyclisationPerformed = false; - Fragment carbohydrateFrag = state.xmlFragmentMap.get(carbohydrate); Attribute anomericId = carbohydrate.getAttribute(SUFFIXAPPLIESTO_ATR); if (anomericId == null){ throw new StructureBuildingException("OPSIN bug: Missing suffixAppliesTo on: " + carbohydrate.getValue()); @@ -1259,10 +1424,10 @@ } carbohydrate.removeAttribute(anomericId); - Element nextSibling = (Element) XOMTools.getNextSibling(carbohydrate); + Element nextSibling = OpsinTools.getNextSibling(carbohydrate); while (nextSibling !=null){ - Element nextNextSibling = (Element) XOMTools.getNextSibling(nextSibling); - String elName = nextSibling.getLocalName(); + Element nextNextSibling = OpsinTools.getNextSibling(nextSibling); + String elName = nextSibling.getName(); if (elName.equals(SUFFIX_EL)){ Element suffix = nextSibling; String value = suffix.getAttributeValue(VALUE_ATR); @@ -1285,7 +1450,6 @@ isAldose = false; if (SYSTEMATICCARBOHYDRATESTEMALDOSE_SUBTYPE_VAL.equals(subtype)){ carbohydrate.getAttribute(SUBTYPE_ATR).setValue(SYSTEMATICCARBOHYDRATESTEMKETOSE_SUBTYPE_VAL); - carbohydrateFrag.setSubType(SYSTEMATICCARBOHYDRATESTEMKETOSE_SUBTYPE_VAL); } } potentialCarbonyl = processUloseSuffix(carbohydrate, suffix, potentialCarbonyl); @@ -1294,7 +1458,7 @@ else if (value.equals("itol") || value.equals("yl") || value.equals("glycoside")){ suffix.addAttribute(new Attribute(LOCANT_ATR, potentialCarbonyl.getFirstLocant())); if (value.equals("glycoside") && OpsinTools.getParentWordRule(subOrRoot).getAttributeValue(WORDRULE_ATR).equals(WordRule.simple.toString())){ - throw new StructureBuildingException("A glycoside requires a space seperated substituent e.g. methyl alpha-D-glucopyranoside"); + throw new StructureBuildingException("A glycoside requires a space-separated substituent e.g. methyl alpha-D-glucopyranoside"); } } } @@ -1303,11 +1467,14 @@ throw new StructureBuildingException("OPSIN bug: Carbohydate cyclised twice!"); } Element ringSize = nextSibling; - cycliseCarbohydrate(carbohydrate, ringSize, potentialCarbonyl); + cycliseCarbohydrateAndApplyAlphaBetaStereo(carbohydrate, ringSize, potentialCarbonyl); ringSize.detach(); cyclisationPerformed = true; } - else if (!elName.equals(LOCANT_EL) && !elName.equals(MULTIPLIER_ATR) && !elName.equals(UNSATURATOR_EL)){ + else if (!elName.equals(LOCANT_EL) && + !elName.equals(MULTIPLIER_ATR) && + !elName.equals(UNSATURATOR_EL) && + !elName.equals(COLONORSEMICOLONDELIMITEDLOCANT_EL)){ break; } nextSibling = nextNextSibling; @@ -1320,13 +1487,23 @@ private void applyUnspecifiedRingSizeCyclisationIfPresent(Element group, Atom potentialCarbonyl) throws StructureBuildingException { boolean cyclise = false; - Element possibleYl = (Element) XOMTools.getNextSibling(group); - if (possibleYl != null && possibleYl.getLocalName().equals(SUFFIX_EL) && possibleYl.getValue().equals("yl")){ - cyclise = true; + Element possibleYl = OpsinTools.getNextSibling(group); + if (possibleYl != null && possibleYl.getName().equals(SUFFIX_EL)){ + if (possibleYl.getAttributeValue(VALUE_ATR).equals("yl")){ + cyclise = true; + } + else { + //(on|uron)osyl + possibleYl = OpsinTools.getNextSibling(possibleYl); + if (possibleYl != null && possibleYl.getName().equals(SUFFIX_EL) && + possibleYl.getAttributeValue(VALUE_ATR).equals("yl")) { + cyclise = true; + } + } } - else{ - Element alphaOrBetaLocantEl = (Element) XOMTools.getPreviousSiblingIgnoringCertainElements(group, new String[]{STEREOCHEMISTRY_EL}); - if (alphaOrBetaLocantEl != null && alphaOrBetaLocantEl.getLocalName().equals(LOCANT_EL) ){ + if (!cyclise) { + Element alphaOrBetaLocantEl = OpsinTools.getPreviousSiblingIgnoringCertainElements(group, new String[]{STEREOCHEMISTRY_EL}); + if (alphaOrBetaLocantEl != null && alphaOrBetaLocantEl.getName().equals(LOCANT_EL) ){ String value = alphaOrBetaLocantEl.getValue(); if (value.equals("alpha") || value.equals("beta") || value.equals("alpha,beta") || value.equals("beta,alpha")){ cyclise = true; @@ -1334,16 +1511,16 @@ } } if (cyclise) { - Element ringSize = new Element(CARBOHYDRATERINGSIZE_EL); + Element ringSize = new TokenEl(CARBOHYDRATERINGSIZE_EL); String sugarStem = group.getValue(); - if (state.xmlFragmentMap.get(group).hasLocant("5") && !sugarStem.equals("rib") && !sugarStem.equals("fruct")){ + if (group.getFrag().hasLocant("5") && !sugarStem.equals("rib") && !sugarStem.equals("fruct")){ ringSize.addAttribute(new Attribute(VALUE_ATR, "6")); } else{ ringSize.addAttribute(new Attribute(VALUE_ATR, "5")); } - XOMTools.insertAfter(group, ringSize); - cycliseCarbohydrate(group, ringSize, potentialCarbonyl); + OpsinTools.insertAfter(group, ringSize); + cycliseCarbohydrateAndApplyAlphaBetaStereo(group, ringSize, potentialCarbonyl); ringSize.detach(); } } @@ -1361,18 +1538,16 @@ */ private Atom processUloseSuffix(Element group, Element suffix, Atom potentialCarbonyl) throws StructureBuildingException { List locantsToConvertToKetones = new ArrayList(); - Element potentialLocantOrMultiplier = (Element) XOMTools.getPreviousSibling(suffix); - if (potentialLocantOrMultiplier.getLocalName().equals(MULTIPLIER_ATR)){ + Element potentialLocantOrMultiplier = OpsinTools.getPreviousSibling(suffix); + if (potentialLocantOrMultiplier.getName().equals(MULTIPLIER_ATR)){ int multVal = Integer.parseInt(potentialLocantOrMultiplier.getAttributeValue(VALUE_ATR)); - Element locant = (Element) XOMTools.getPreviousSibling(potentialLocantOrMultiplier); - if (locant != null && locant.getLocalName().equals(LOCANT_EL)){ - String[] locantStrs = MATCH_COMMA.split(locant.getValue()); + Element locant = OpsinTools.getPreviousSibling(potentialLocantOrMultiplier); + if (locant != null && locant.getName().equals(LOCANT_EL)){ + String[] locantStrs = locant.getValue().split(","); if (locantStrs.length != multVal) { throw new StructureBuildingException("Mismatch between locant and multiplier counts (" + locantStrs.length + " and " + multVal + "):" + locant.getValue()); } - for (String locantStr : locantStrs) { - locantsToConvertToKetones.add(locantStr); - } + Collections.addAll(locantsToConvertToKetones, locantStrs); locant.detach(); } else{ @@ -1384,12 +1559,12 @@ } else { Element locant = potentialLocantOrMultiplier; - if (!locant.getLocalName().equals(LOCANT_EL)){ - locant = (Element) XOMTools.getPreviousSibling(group); + if (!locant.getName().equals(LOCANT_EL)){ + locant = OpsinTools.getPreviousSibling(group); } - if (locant !=null && locant.getLocalName().equals(LOCANT_EL)){ + if (locant !=null && locant.getName().equals(LOCANT_EL)){ String locantStr = locant.getValue(); - if (MATCH_COMMA.split(locantStr).length==1){ + if (locantStr.split(",").length==1){ locantsToConvertToKetones.add(locantStr); } else{ @@ -1401,14 +1576,14 @@ locantsToConvertToKetones.add("2"); } } - Fragment frag = state.xmlFragmentMap.get(group); + Fragment frag = group.getFrag(); if (suffix.getAttributeValue(VALUE_ATR).equals("ulose")) {//convert aldose to ketose Atom aldehydeAtom = potentialCarbonyl; boolean foundBond = false; for (Bond bond : aldehydeAtom.getBonds()) { if (bond.getOrder() ==2){ Atom otherAtom = bond.getOtherAtom(aldehydeAtom); - if (otherAtom.getElement().equals("O") && otherAtom.getCharge()==0 && otherAtom.getBonds().size()==1){ + if (otherAtom.getElement() == ChemEl.O && otherAtom.getCharge()==0 && otherAtom.getBondCount()==1){ bond.setOrder(1); foundBond = true; break; @@ -1427,7 +1602,7 @@ for (Bond bond : backboneAtom.getBonds()) { if (bond.getOrder() ==1){ Atom otherAtom = bond.getOtherAtom(backboneAtom); - if (otherAtom.getElement().equals("O") && otherAtom.getCharge()==0 && otherAtom.getBonds().size()==1){ + if (otherAtom.getElement() == ChemEl.O && otherAtom.getCharge()==0 && otherAtom.getBondCount()==1){ bond.setOrder(2); foundBond = true; break; @@ -1450,14 +1625,14 @@ * @param potentialCarbonyl * @throws StructureBuildingException */ - private void cycliseCarbohydrate(Element carbohydrateGroup, Element ringSize, Atom potentialCarbonyl) throws StructureBuildingException { - Fragment frag = state.xmlFragmentMap.get(carbohydrateGroup); + private void cycliseCarbohydrateAndApplyAlphaBetaStereo(Element carbohydrateGroup, Element ringSize, Atom potentialCarbonyl) throws StructureBuildingException { + Fragment frag = carbohydrateGroup.getFrag(); String ringSizeVal = ringSize.getAttributeValue(VALUE_ATR); - Element potentialLocant = (Element) XOMTools.getPreviousSibling(ringSize); + Element potentialLocant = OpsinTools.getPreviousSibling(ringSize); Atom carbonylCarbon = null; Atom atomToJoinWith = null; - if (potentialLocant.getLocalName().equals(LOCANT_EL)){ - String[] locants = MATCH_COMMA.split(potentialLocant.getValue()); + if (potentialLocant.getName().equals(LOCANT_EL)){ + String[] locants = potentialLocant.getValue().split(","); if (locants.length != 2){ throw new StructureBuildingException("Expected 2 locants in front of sugar ring size specifier but found: " + potentialLocant.getValue()); } @@ -1505,9 +1680,14 @@ } state.fragManager.createBond(carbonylCarbon, atomToJoinWith, 1); CycleDetector.assignWhetherAtomsAreInCycles(frag); - Element alphaOrBetaLocantEl = (Element) XOMTools.getPreviousSiblingIgnoringCertainElements(carbohydrateGroup, new String[]{STEREOCHEMISTRY_EL}); - if (alphaOrBetaLocantEl !=null && alphaOrBetaLocantEl.getLocalName().equals(LOCANT_EL)){ - Element stereoPrefixAfterAlphaBeta = (Element) XOMTools.getNextSibling(alphaOrBetaLocantEl); + applyAlphaBetaStereoToCyclisedCarbohydrate(carbohydrateGroup, carbonylCarbon); + } + + private void applyAlphaBetaStereoToCyclisedCarbohydrate(Element carbohydrateGroup, Atom carbonylCarbon) { + Fragment frag = carbohydrateGroup.getFrag(); + Element alphaOrBetaLocantEl = OpsinTools.getPreviousSiblingIgnoringCertainElements(carbohydrateGroup, new String[]{STEREOCHEMISTRY_EL}); + if (alphaOrBetaLocantEl !=null && alphaOrBetaLocantEl.getName().equals(LOCANT_EL)){ + Element stereoPrefixAfterAlphaBeta = OpsinTools.getNextSibling(alphaOrBetaLocantEl); Atom anomericReferenceAtom = getAnomericReferenceAtom(frag); if (anomericReferenceAtom ==null){ throw new RuntimeException("OPSIN bug: Unable to determine anomeric reference atom in: " +carbohydrateGroup.getValue()); @@ -1529,15 +1709,15 @@ } private void processAldoseDiSuffix(String suffixValue, Element group, Atom aldehydeAtom) throws StructureBuildingException { - Fragment frag = state.xmlFragmentMap.get(group); + Fragment frag = group.getFrag(); Atom alcoholAtom = frag.getAtomByLocantOrThrow(String.valueOf(frag.getChainLength())); if (suffixValue.equals("aric acid") || suffixValue.equals("arate")){ - removeTerminalOxygen(alcoholAtom, 1); - Fragment f = state.fragManager.buildSMILES("O", frag.getType(), frag.getSubType(), NONE_LABELS_VAL); + FragmentTools.removeTerminalOxygen(state, alcoholAtom, 1); + Fragment f = state.fragManager.buildSMILES("O", group, NONE_LABELS_VAL); state.fragManager.incorporateFragment(f, f.getFirstAtom(), frag, alcoholAtom, 2); - f = state.fragManager.buildSMILES("O", frag.getType(), frag.getSubType(), NONE_LABELS_VAL); + f = state.fragManager.buildSMILES("O", group, NONE_LABELS_VAL); Atom hydroxyAtom = f.getFirstAtom(); if (suffixValue.equals("arate")){ hydroxyAtom.addChargeAndProtons(-1, -1); @@ -1545,7 +1725,7 @@ state.fragManager.incorporateFragment(f, f.getFirstAtom(), frag, alcoholAtom, 1); frag.addFunctionalAtom(hydroxyAtom); - f = state.fragManager.buildSMILES("O", frag.getType(), frag.getSubType(), NONE_LABELS_VAL); + f = state.fragManager.buildSMILES("O", group, NONE_LABELS_VAL); hydroxyAtom = f.getFirstAtom(); if (suffixValue.equals("arate")){ hydroxyAtom.addChargeAndProtons(-1, -1); @@ -1555,8 +1735,8 @@ } else if (suffixValue.equals("dialdose")){ - removeTerminalOxygen(alcoholAtom, 1); - Fragment f = state.fragManager.buildSMILES("O", frag.getType(), frag.getSubType(), NONE_LABELS_VAL); + FragmentTools.removeTerminalOxygen(state, alcoholAtom, 1); + Fragment f = state.fragManager.buildSMILES("O", group, NONE_LABELS_VAL); state.fragManager.incorporateFragment(f, f.getFirstAtom(), frag, alcoholAtom, 2); } else{ @@ -1579,14 +1759,17 @@ } try{ String locant = a.getFirstLocant(); - int intVal = Integer.parseInt(locant); - if (intVal > highestLocantfound){ - highestLocantfound = intVal; - configurationalAtom = a; + if (locant !=null) { + int intVal = Integer.parseInt(locant); + if (intVal > highestLocantfound){ + highestLocantfound = intVal; + configurationalAtom = a; + } } + } - catch (Exception e) { - //may throw null pointer exceptions or number format exceptions + catch (NumberFormatException e) { + //may throw number format exceptions } } return configurationalAtom; @@ -1630,10 +1813,10 @@ String nextLowestLocant = String.valueOf(Integer.parseInt(referenceAtom.getFirstLocant()) -1); Atom[] atomRefs4 = new Atom[4]; for (Atom neighbour : neighbours) { - if (neighbour.getElement().equals("O")){ + if (neighbour.getElement() == ChemEl.O) { atomRefs4[0] = neighbour; } - else if (neighbour.getElement().equals("C")){ + else if (neighbour.getElement() == ChemEl.C) { if (neighbour.getFirstLocant().equals(nextLowestLocant)){ atomRefs4[1] = neighbour; } @@ -1656,12 +1839,20 @@ private Atom[] getDeterministicAtomRefs4ForAnomericAtom(Atom anomericAtom) { List neighbours = anomericAtom.getAtomNeighbours(); - if (neighbours.size()!=3 && neighbours.size()!=4){ + Atom[] atomRefs4 = new Atom[4]; + if (neighbours.size() == 3 || neighbours.size() == 4 ){ + //normal case + } + else if (neighbours.size() == 2 && anomericAtom.getOutValency() == 1) { + //trivial glycosyl + atomRefs4[1] = AtomParity.deoxyHydrogen; + } + else { throw new RuntimeException("OPSIN bug: Unexpected number of atoms connected to anomeric atom of carbohydrate"); } - Atom[] atomRefs4 = new Atom[4]; + for (Atom neighbour : neighbours) { - if (neighbour.getElement().equals("C")){ + if (neighbour.getElement() == ChemEl.C){ if (neighbour.getAtomIsInACycle()){ atomRefs4[0] = neighbour; } @@ -1669,7 +1860,7 @@ atomRefs4[3] = neighbour; } } - else if (neighbour.getElement().equals("O")){ + else if (neighbour.getElement() == ChemEl.O){ int incomingVal =neighbour.getIncomingValency(); if (incomingVal ==1){ atomRefs4[1] = neighbour; @@ -1703,15 +1894,21 @@ * @param subOrRoot The substituent/root to looks for multipliers in. */ private void processMultipliers(Element subOrRoot) { - List multipliers = XOMTools.getChildElementsWithTagName(subOrRoot, MULTIPLIER_EL); + List multipliers = subOrRoot.getChildElements(MULTIPLIER_EL); for (Element multiplier : multipliers) { - Element possibleLocant =(Element)XOMTools.getPreviousSibling(multiplier); + Element possibleLocant = OpsinTools.getPreviousSibling(multiplier); String[] locants = null; - if (possibleLocant !=null && possibleLocant.getLocalName().equals(LOCANT_EL)){ - locants = MATCH_COMMA.split(possibleLocant.getValue()); + if (possibleLocant != null){ + String possibleLocantElName = possibleLocant.getName(); + if (possibleLocantElName.equals(LOCANT_EL)){ + locants = possibleLocant.getValue().split(","); + } + else if (possibleLocantElName.equals(COLONORSEMICOLONDELIMITEDLOCANT_EL)){ + locants = StringTools.removeDashIfPresent(possibleLocant.getValue()).split(":"); + } } - Element featureToMultiply = (Element)XOMTools.getNextSibling(multiplier); - String nextName = featureToMultiply.getLocalName(); + Element featureToMultiply = OpsinTools.getNextSibling(multiplier); + String nextName = featureToMultiply.getName(); if(nextName.equals(UNSATURATOR_EL) || nextName.equals(SUFFIX_EL) || nextName.equals(SUBTRACTIVEPREFIX_EL) || @@ -1722,11 +1919,11 @@ featureToMultiply.addAttribute(new Attribute(MULTIPLIED_ATR, "multiplied")); } for(int i= mvalue -1; i >=1; i--) { - Element newElement = new Element(featureToMultiply); + Element newElement = featureToMultiply.copy(); if (locants !=null && locants.length==mvalue){ newElement.addAttribute(new Attribute(LOCANT_ATR, locants[i])); } - XOMTools.insertAfter(featureToMultiply, newElement); + OpsinTools.insertAfter(featureToMultiply, newElement); } multiplier.detach(); if (locants !=null && locants.length==mvalue){ @@ -1747,8 +1944,8 @@ * @throws StructureBuildingException */ private void detectConjunctiveSuffixGroups(Element subOrRoot, List allGroups) throws ComponentGenerationException, StructureBuildingException { - List groups = XOMTools.getChildElementsWithTagName(subOrRoot, GROUP_EL); - if (groups.size()>1){ + List groups = subOrRoot.getChildElements(GROUP_EL); + if (groups.size() > 1) { List conjunctiveGroups = new ArrayList(); Element ringGroup =null; for (int i = groups.size() -1 ; i >=0; i--) { @@ -1771,29 +1968,29 @@ throw new ComponentGenerationException("OPSIN Bug: Two groups exactly should be present at this point when processing conjunctive nomenclature"); } Element primaryConjunctiveGroup =conjunctiveGroups.get(0); - Fragment primaryConjunctiveFrag = state.xmlFragmentMap.get(primaryConjunctiveGroup); + Fragment primaryConjunctiveFrag = primaryConjunctiveGroup.getFrag(); //remove all locants List atomList = primaryConjunctiveFrag.getAtomList(); for (Atom atom : atomList) { atom.clearLocants(); } List suffixes = new ArrayList(); - Element possibleSuffix = (Element) XOMTools.getNextSibling(primaryConjunctiveGroup); + Element possibleSuffix = OpsinTools.getNextSibling(primaryConjunctiveGroup); while (possibleSuffix !=null){ - if (possibleSuffix.getLocalName().equals(SUFFIX_EL)){ + if (possibleSuffix.getName().equals(SUFFIX_EL)){ suffixes.add(possibleSuffix); } - possibleSuffix = (Element) XOMTools.getNextSibling(possibleSuffix); + possibleSuffix = OpsinTools.getNextSibling(possibleSuffix); } preliminaryProcessSuffixes(primaryConjunctiveGroup, suffixes); - resolveSuffixes(primaryConjunctiveGroup, suffixes); - for (Element suffix : suffixes) { - suffix.detach(); - } - primaryConjunctiveGroup.setLocalName(CONJUNCTIVESUFFIXGROUP_EL); + suffixApplier.resolveSuffixes(primaryConjunctiveGroup, suffixes); + for (Element suffix : suffixes) { + suffix.detach(); + } + primaryConjunctiveGroup.setName(CONJUNCTIVESUFFIXGROUP_EL); allGroups.remove(primaryConjunctiveGroup); - Element possibleMultiplier = (Element) XOMTools.getPreviousSibling(primaryConjunctiveGroup); + Element possibleMultiplier = OpsinTools.getPreviousSibling(primaryConjunctiveGroup); //label atoms appropriately boolean alphaIsPosition1 = atomList.get(0).getIncomingValency() < 3; int counter =0; @@ -1822,19 +2019,20 @@ } counter++; } - if (MULTIPLIER_EL.equals(possibleMultiplier.getLocalName())){ + if (MULTIPLIER_EL.equals(possibleMultiplier.getName())){ int multiplier = Integer.parseInt(possibleMultiplier.getAttributeValue(VALUE_ATR)); for (int i = 1; i < multiplier; i++) { - Element conjunctiveSuffixGroup = new Element(primaryConjunctiveGroup); + Element conjunctiveSuffixGroup = primaryConjunctiveGroup.copy(); Fragment newFragment = state.fragManager.copyAndRelabelFragment(primaryConjunctiveFrag, i); - state.xmlFragmentMap.put(conjunctiveSuffixGroup, newFragment); + newFragment.setTokenEl(conjunctiveSuffixGroup); + conjunctiveSuffixGroup.setFrag(newFragment); conjunctiveGroups.add(conjunctiveSuffixGroup); - XOMTools.insertAfter(primaryConjunctiveGroup, conjunctiveSuffixGroup); + OpsinTools.insertAfter(primaryConjunctiveGroup, conjunctiveSuffixGroup); } - Element possibleLocant =(Element)XOMTools.getPreviousSibling(possibleMultiplier); + Element possibleLocant = OpsinTools.getPreviousSibling(possibleMultiplier); possibleMultiplier.detach(); - if (possibleLocant.getLocalName().equals(LOCANT_EL)){ - String[] locants = MATCH_COMMA.split(possibleLocant.getValue()); + if (possibleLocant.getName().equals(LOCANT_EL)){ + String[] locants = possibleLocant.getValue().split(","); if (locants.length!=multiplier){ throw new ComponentGenerationException("mismatch between number of locants and multiplier in conjunctive nomenclature routine"); } @@ -1856,24 +2054,24 @@ * @throws ComponentGenerationException */ private void matchLocantsToDirectFeatures(Element subOrRoot) throws ComponentGenerationException { - List locants = XOMTools.getChildElementsWithTagName(subOrRoot, LOCANT_EL); - List groups = XOMTools.getChildElementsWithTagName(subOrRoot, GROUP_EL); + List locants = subOrRoot.getChildElements(LOCANT_EL); + List groups = subOrRoot.getChildElements(GROUP_EL); for (Element group : groups) { if (group.getAttributeValue(SUBTYPE_ATR).equals(HANTZSCHWIDMAN_SUBTYPE_VAL)){//handle Hantzch-widman systems if (group.getAttribute(ADDBOND_ATR)!=null){//special case for partunsatring //exception for where a locant is supposed to indicate the location of a double bond... - Elements deltas = subOrRoot.getChildElements(DELTA_EL); + List deltas = subOrRoot.getChildElements(DELTA_EL); if (deltas.size()==0){ - Element delta =new Element(DELTA_EL); - Element appropriateLocant = XOMTools.getPreviousSiblingIgnoringCertainElements(group, new String[]{HETEROATOM_EL, MULTIPLIER_EL}); - if (appropriateLocant !=null && appropriateLocant.getLocalName().equals(LOCANT_EL) && MATCH_COMMA.split(appropriateLocant.getValue()).length == 1){ - delta.appendChild(appropriateLocant.getValue()); - XOMTools.insertBefore(appropriateLocant, delta); + Element delta =new TokenEl(DELTA_EL); + Element appropriateLocant = OpsinTools.getPreviousSiblingIgnoringCertainElements(group, new String[]{HETEROATOM_EL, MULTIPLIER_EL}); + if (appropriateLocant !=null && appropriateLocant.getName().equals(LOCANT_EL) && appropriateLocant.getValue().split(",").length == 1){ + delta.setValue(appropriateLocant.getValue()); + OpsinTools.insertBefore(appropriateLocant, delta); appropriateLocant.detach(); locants.remove(appropriateLocant); } else{ - delta.appendChild(""); + delta.setValue(""); subOrRoot.insertChild(delta, 0);//no obvious attempt to set double bond position, potentially ambiguous, valency will be used to choose later } } @@ -1881,15 +2079,15 @@ if (locants.size()>0 ){ Element locantBeforeHWSystem = null; List heteroAtoms = new ArrayList(); - int indexOfGroup =subOrRoot.indexOf(group); + int indexOfGroup = subOrRoot.indexOf(group); for (int j = indexOfGroup -1; j >= 0; j--) { - String elName=((Element)subOrRoot.getChild(j)).getLocalName(); + String elName = subOrRoot.getChild(j).getName(); if (elName.equals(LOCANT_EL)){ - locantBeforeHWSystem = (Element)subOrRoot.getChild(j); + locantBeforeHWSystem = subOrRoot.getChild(j); break; } else if(elName.equals(HETEROATOM_EL)){ - Element heteroAtom = (Element)subOrRoot.getChild(j); + Element heteroAtom = subOrRoot.getChild(j); heteroAtoms.add(heteroAtom); if (heteroAtom.getAttribute(LOCANT_ATR)!=null){//locants already assigned, assumedly by process multipliers break; @@ -1901,11 +2099,11 @@ } Collections.reverse(heteroAtoms); if (locantBeforeHWSystem !=null){ - String[] locantValues = MATCH_COMMA.split(locantBeforeHWSystem.getValue()); + String[] locantValues = locantBeforeHWSystem.getValue().split(","); //detect a solitary locant in front of a HW system and prevent it being assigned. //something like 1-aziridin-1-yl never means the N is at position 1 as it is at position 1 by convention //this special case is not applied to pseudo HW like systems e.g. [1]oxacyclotetradecine - if (locantValues.length ==1 && state.xmlFragmentMap.get(group).getAtomList().size() <=10){ + if (locantValues.length ==1 && group.getFrag().getAtomCount() <=10){ locants.remove(locantBeforeHWSystem);//don't assign this locant } else { @@ -1918,7 +2116,7 @@ locants.remove(locantBeforeHWSystem); } else if (heteroAtoms.size()>1){ - throw new ComponentGenerationException("Mismatch between number of locants and HW heteroatoms"); + throw new ComponentGenerationException("Mismatch between number of locants and Hantzsch-Widman heteroatoms"); } } } @@ -1936,10 +2134,17 @@ */ private void assignSingleLocantsToAdjacentFeatures(List locants) { for (Element locant : locants) { - String[] locantValues = MATCH_COMMA.split(locant.getValue()); - Element referent = (Element)XOMTools.getNextSibling(locant); - if (referent!=null && locantValues.length==1){ - String refName = referent.getLocalName(); + String[] locantValues = locant.getValue().split(","); + Element referent = OpsinTools.getNextSibling(locant); + if (referent != null && locantValues.length == 1){ + String refName = referent.getName(); + if (refName.equals(ISOTOPESPECIFICATION_EL)) { + referent = OpsinTools.getNextSibling(referent); + if (referent == null) { + return; + } + refName = referent.getName(); + } //Only assigning locants to elements that were not created by a multiplier if(referent.getAttribute(LOCANT_ATR) == null && referent.getAttribute(MULTIPLIED_ATR) == null && (refName.equals(UNSATURATOR_EL) || refName.equals(SUFFIX_EL) || @@ -1964,7 +2169,7 @@ * @throws StructureBuildingException */ private void preliminaryProcessSuffixes(Element group, List suffixes) throws ComponentGenerationException, StructureBuildingException{ - Fragment suffixableFragment =state.xmlFragmentMap.get(group); + Fragment suffixableFragment = group.getFrag(); if (group.getAttribute(SUFFIXAPPLIESTO_ATR)!=null){//typically a trivial polyAcid or aminoAcid processSuffixAppliesTo(group, suffixes,suffixableFragment); @@ -1982,19 +2187,18 @@ state.xmlSuffixMap.put(group, suffixFragments); boolean suffixesResolved =false; if (group.getAttributeValue(TYPE_ATR).equals(CHALCOGENACIDSTEM_TYPE_VAL)){//merge the suffix into the chalcogen acid stem e.g sulfonoate needs to be one fragment for infix replacement - resolveSuffixes(group, suffixes); - suffixesResolved =true; - } + suffixApplier.resolveSuffixes(group, suffixes); + suffixesResolved =true; + } processSuffixPrefixes(suffixes);//e.g. carbox amide - FunctionalReplacement.processInfixFunctionalReplacementNomenclature(state, suffixes, suffixFragments); + functionalReplacement.processInfixFunctionalReplacementNomenclature(suffixes, suffixFragments); processRemovalOfHydroxyGroupsRules(suffixes, suffixableFragment); if (group.getValue().equals("oxal")){//oxalic acid is treated as a non carboxylic acid for the purposes of functional replacment. See P-65.2.3 - resolveSuffixes(group, suffixes); - group.getAttribute(TYPE_ATR).setValue(NONCARBOXYLICACID_TYPE_VAL); - suffixableFragment.setType(NONCARBOXYLICACID_TYPE_VAL); - suffixesResolved =true; - } + suffixApplier.resolveSuffixes(group, suffixes); + group.getAttribute(TYPE_ATR).setValue(NONCARBOXYLICACID_TYPE_VAL); + suffixesResolved =true; + } if (suffixesResolved){ //suffixes have already been resolved so need to be detached to avoid being passed to resolveSuffixes later for (int i = suffixes.size() -1; i>=0; i--) { @@ -2017,14 +2221,21 @@ private void applyDefaultLocantsToSuffixesIfApplicable(Element group, Fragment suffixableFragment) { String defaultLocantsAtrValue = group.getAttributeValue(SUFFIXAPPLIESTOBYDEFAULT_ATR); if (defaultLocantsAtrValue != null){ - String[] suffixInstructions = MATCH_COMMA.split(defaultLocantsAtrValue); - int firstIdInFragment = suffixableFragment.getIdOfFirstAtom(); + String[] suffixInstructions = defaultLocantsAtrValue.split(","); Element suffix = OpsinTools.getNextNonChargeSuffix(group); - for (String suffixInstruction : suffixInstructions) { - if (suffix !=null){ - suffix.addAttribute(new Attribute(DEFAULTLOCANTID_ATR, Integer.toString(firstIdInFragment + Integer.parseInt(suffixInstruction) -1))); + if (suffix !=null) { + List suffixes = new ArrayList(); + while (suffix != null) { + suffixes.add(suffix); suffix = OpsinTools.getNextNonChargeSuffix(suffix); } + if (suffixInstructions.length == suffixes.size()) { + int firstIdInFragment = suffixableFragment.getIdOfFirstAtom(); + for (int i = 0; i < suffixInstructions.length; i++) { + String suffixInstruction = suffixInstructions[i]; + suffixes.get(i).addAttribute(new Attribute(DEFAULTLOCANTID_ATR, Integer.toString(firstIdInFragment + Integer.parseInt(suffixInstruction) -1))); + } + } } } } @@ -2051,12 +2262,16 @@ } String suffixInstruction =group.getAttributeValue(SUFFIXAPPLIESTO_ATR); - String[] suffixInstructions = MATCH_COMMA.split(suffixInstruction); + String[] suffixInstructions = suffixInstruction.split(","); + int firstIdInFragment=suffixableFragment.getIdOfFirstAtom(); if (CYCLEFORMER_SUBTYPE_VAL.equals(suffix.getAttributeValue(SUBTYPE_ATR))){ if (suffixInstructions.length !=2){ throw new ComponentGenerationException("suffix: " + suffix.getValue() + " used on an inappropriate group"); } - suffix.addAttribute(new Attribute(LOCANTID_ATR, suffixInstruction)); + String[] locantIds = new String[2]; + locantIds[0] = Integer.toString(firstIdInFragment + Integer.parseInt(suffixInstructions[0]) - 1); + locantIds[1] = Integer.toString(firstIdInFragment + Integer.parseInt(suffixInstructions[1]) - 1); + suffix.addAttribute(new Attribute(LOCANTID_ATR, StringTools.arrayToString(locantIds, ","))); return; } boolean symmetricSuffixes =true; @@ -2067,12 +2282,11 @@ symmetricSuffixes = false; } - int firstIdInFragment=suffixableFragment.getIdOfFirstAtom(); if (suffix.getAttribute(LOCANT_ATR)==null){ suffix.addAttribute(new Attribute(LOCANTID_ATR, Integer.toString(firstIdInFragment + Integer.parseInt(suffixInstructions[0]) -1))); } for (int i = 1; i < suffixInstructions.length; i++) { - Element newSuffix = new Element(SUFFIX_EL); + Element newSuffix = new TokenEl(SUFFIX_EL); if (symmetricSuffixes){ newSuffix.addAttribute(new Attribute(VALUE_ATR, suffix.getAttributeValue(VALUE_ATR))); newSuffix.addAttribute(new Attribute(TYPE_ATR, suffix.getAttributeValue(TYPE_ATR))); @@ -2088,7 +2302,7 @@ newSuffix.addAttribute(new Attribute(TYPE_ATR, ROOT_EL)); } newSuffix.addAttribute(new Attribute(LOCANTID_ATR, Integer.toString(firstIdInFragment + Integer.parseInt(suffixInstructions[i]) -1))); - XOMTools.insertAfter(suffix, newSuffix); + OpsinTools.insertAfter(suffix, newSuffix); suffixes.add(newSuffix); } } @@ -2108,102 +2322,105 @@ String subgroupType = frag.getSubType(); String suffixTypeToUse =null; - if (suffixRules.isGroupTypeWithSpecificSuffixRules(groupType)){ + if (suffixApplier.isGroupTypeWithSpecificSuffixRules(groupType)){ suffixTypeToUse =groupType; } else{ suffixTypeToUse = STANDARDGROUP_TYPE_VAL; } - for (Element suffix : suffixes) { - String suffixValue = suffix.getAttributeValue(VALUE_ATR); + for (Element suffix : suffixes) { + String suffixValue = suffix.getAttributeValue(VALUE_ATR); + + boolean cyclic;//needed for addSuffixPrefixIfNonePresentAndCyclic rule + Atom atomLikelyToBeUsedBySuffix = null; + + String locant = suffix.getAttributeValue(LOCANT_ATR); + String locantId = suffix.getAttributeValue(LOCANTID_ATR); + + if (locant != null && locant.indexOf(',') == -1) { + atomLikelyToBeUsedBySuffix = frag.getAtomByLocant(locant); + } + else if (locantId != null && locantId.indexOf(',') == -1) { + atomLikelyToBeUsedBySuffix = frag.getAtomByIDOrThrow(Integer.parseInt(locantId)); + } + if (atomLikelyToBeUsedBySuffix==null){ + //a locant has not been specified + //also can happen in the cases of things like fused rings where the final numbering is not available so lookup by locant fails (in which case all the atoms will be cyclic anyway) + atomLikelyToBeUsedBySuffix = frag.getFirstAtom(); + } + cyclic = atomLikelyToBeUsedBySuffix.getAtomIsInACycle(); - boolean cyclic;//needed for addSuffixPrefixIfNonePresentAndCyclic rule - Atom atomLikelyToBeUsedBySuffix = null; - - String locant = suffix.getAttributeValue(LOCANT_ATR); - String locantId = suffix.getAttributeValue(LOCANTID_ATR); - - if (locant != null && locant.indexOf(',') == -1) { - atomLikelyToBeUsedBySuffix = frag.getAtomByLocant(locant); - } - else if (locantId != null && locantId.indexOf(',') == -1) { - atomLikelyToBeUsedBySuffix = frag.getAtomByIDOrThrow(Integer.parseInt(locantId)); - } - if (atomLikelyToBeUsedBySuffix==null){ - //a locant has not been specified - //also can happen in the cases of things like fused rings where the final numbering is not available so lookup by locant fails (in which case all the atoms will be cyclic anyway) - atomLikelyToBeUsedBySuffix = frag.getFirstAtom(); - } - cyclic = atomLikelyToBeUsedBySuffix.getAtomIsInACycle(); - - Elements suffixRuleTags = suffixRules.getSuffixRuleTags(suffixTypeToUse, suffixValue, subgroupType); - Fragment suffixFrag = null; - /* - * Temp fragments are build for each addGroup rule and then merged into suffixFrag - */ - for (int j = 0; j < suffixRuleTags.size(); j++) { - Element suffixRuleTag = suffixRuleTags.get(j); - String suffixRuleTagName = suffixRuleTag.getLocalName(); - if (suffixRuleTagName.equals(SUFFIXRULES_ADDGROUP_EL)) { - String labels = NONE_LABELS_VAL; - if (suffixRuleTag.getAttribute(SUFFIXRULES_LABELS_ATR) != null) { - labels = suffixRuleTag.getAttributeValue(SUFFIXRULES_LABELS_ATR); - } - suffixFrag = state.fragManager.buildSMILES(suffixRuleTag.getAttributeValue(SUFFIXRULES_SMILES_ATR), SUFFIX_TYPE_VAL, SUFFIX_SUBTYPE_VAL, labels); - List atomList = suffixFrag.getAtomList(); - if (suffixRuleTag.getAttribute(SUFFIXRULES_FUNCTIONALIDS_ATR) != null) { - String[] relativeIdsOfFunctionalAtoms = MATCH_COMMA.split(suffixRuleTag.getAttributeValue(SUFFIXRULES_FUNCTIONALIDS_ATR)); - for (String relativeId : relativeIdsOfFunctionalAtoms) { - int atomIndice = Integer.parseInt(relativeId) -1; - if (atomIndice >=atomList.size()){ - throw new StructureBuildingException("Check suffixRules.xml: Atom requested to have a functionalAtom was not within the suffix fragment"); - } - suffixFrag.addFunctionalAtom(atomList.get(atomIndice)); - } - } - if (suffixRuleTag.getAttribute(SUFFIXRULES_OUTIDS_ATR) != null) { - String[] relativeIdsOfOutAtoms = MATCH_COMMA.split(suffixRuleTag.getAttributeValue(SUFFIXRULES_OUTIDS_ATR)); - for (String relativeId : relativeIdsOfOutAtoms) { - int atomIndice = Integer.parseInt(relativeId) -1; - if (atomIndice >=atomList.size()){ - throw new StructureBuildingException("Check suffixRules.xml: Atom requested to have a outAtom was not within the suffix fragment"); - } - suffixFrag.addOutAtom(atomList.get(atomIndice), 1 , true); - } - } - } - else if (suffixRuleTagName.equals(SUFFIXRULES_ADDSUFFIXPREFIXIFNONEPRESENTANDCYCLIC_EL)){ - if (cyclic && suffix.getAttribute(SUFFIXPREFIX_ATR)==null){ - suffix.addAttribute(new Attribute(SUFFIXPREFIX_ATR, suffixRuleTag.getAttributeValue(SUFFIXRULES_SMILES_ATR))); - } - } - else if (suffixRuleTagName.equals(SUFFIXRULES_ADDFUNCTIONALATOMSTOHYDROXYGROUPS_EL)){ + List suffixRules = suffixApplier.getSuffixRuleTags(suffixTypeToUse, suffixValue, subgroupType); + Fragment suffixFrag = null; + /* + * Temp fragments are build for each addGroup rule and then merged into suffixFrag + */ + for (SuffixRule suffixRule : suffixRules) { + switch (suffixRule.getType()) { + case addgroup: + String labels = suffixRule.getAttributeValue(SUFFIXRULES_LABELS_ATR); + if (labels == null) { + labels = NONE_LABELS_VAL; + } + suffixFrag = state.fragManager.buildSMILES(suffixRule.getAttributeValue(SUFFIXRULES_SMILES_ATR), SUFFIX_TYPE_VAL, labels); + List atomList = suffixFrag.getAtomList(); + String functionalIdsAtr = suffixRule.getAttributeValue(SUFFIXRULES_FUNCTIONALIDS_ATR); + if (functionalIdsAtr != null) { + String[] relativeIdsOfFunctionalAtoms = functionalIdsAtr.split(","); + for (String relativeId : relativeIdsOfFunctionalAtoms) { + int atomIndice = Integer.parseInt(relativeId) -1; + if (atomIndice >=atomList.size()){ + throw new StructureBuildingException("Check suffixRules.xml: Atom requested to have a functionalAtom was not within the suffix fragment"); + } + suffixFrag.addFunctionalAtom(atomList.get(atomIndice)); + } + } + String outIdsAtr = suffixRule.getAttributeValue(SUFFIXRULES_OUTIDS_ATR); + if (outIdsAtr != null) { + String[] relativeIdsOfOutAtoms = outIdsAtr.split(","); + for (String relativeId : relativeIdsOfOutAtoms) { + int atomIndice = Integer.parseInt(relativeId) -1; + if (atomIndice >=atomList.size()){ + throw new StructureBuildingException("Check suffixRules.xml: Atom requested to have a outAtom was not within the suffix fragment"); + } + suffixFrag.addOutAtom(atomList.get(atomIndice), 1 , true); + } + } + break; + case addSuffixPrefixIfNonePresentAndCyclic: + if (cyclic && suffix.getAttribute(SUFFIXPREFIX_ATR)==null){ + suffix.addAttribute(new Attribute(SUFFIXPREFIX_ATR, suffixRule.getAttributeValue(SUFFIXRULES_SMILES_ATR))); + } + break; + case addFunctionalAtomsToHydroxyGroups: if (suffixFrag != null){ throw new ComponentGenerationException("addFunctionalAtomsToHydroxyGroups is not currently compatable with the addGroup suffix rule"); } addFunctionalAtomsToHydroxyGroups(atomLikelyToBeUsedBySuffix); - } - else if (suffixRuleTagName.equals(SUFFIXRULES_CHARGEHYDROXYGROUPS_EL)){ + break; + case chargeHydroxyGroups: if (suffixFrag != null){ throw new ComponentGenerationException("chargeHydroxyGroups is not currently compatable with the addGroup suffix rule"); } chargeHydroxyGroups(atomLikelyToBeUsedBySuffix); - - } - else if (suffixRuleTagName.equals(SUFFIXRULES_REMOVETERMINALOXYGEN_EL)){ + break; + case removeTerminalOxygen: if (suffixFrag != null){ throw new ComponentGenerationException("removeTerminalOxygen is not currently compatible with the addGroup suffix rule"); } - int bondOrder = Integer.parseInt(suffixRuleTag.getAttributeValue(SUFFIXRULES_ORDER_ATR)); - removeTerminalOxygen(atomLikelyToBeUsedBySuffix, bondOrder); + int bondOrder = Integer.parseInt(suffixRule.getAttributeValue(SUFFIXRULES_ORDER_ATR)); + FragmentTools.removeTerminalOxygen(state, atomLikelyToBeUsedBySuffix, bondOrder); + break; + default: + break; } - } - if (suffixFrag != null) { + } + if (suffixFrag != null) { suffixFragments.add(suffixFrag); - state.xmlFragmentMap.put(suffix, suffixFrag); - } - } + suffix.setFrag(suffixFrag); + } + } return suffixFragments; } @@ -2213,32 +2430,30 @@ * @param suffixes The suffix elements for a fragment. * @param frag The fragment to which the suffix will be applied * @throws ComponentGenerationException - * @throws StructureBuildingException */ - private void processRemovalOfHydroxyGroupsRules(List suffixes, Fragment frag) throws ComponentGenerationException, StructureBuildingException{ + private void processRemovalOfHydroxyGroupsRules(List suffixes, Fragment frag) throws ComponentGenerationException { String groupType = frag.getType(); String subgroupType = frag.getSubType(); String suffixTypeToUse =null; - if (suffixRules.isGroupTypeWithSpecificSuffixRules(groupType)){ + if (suffixApplier.isGroupTypeWithSpecificSuffixRules(groupType)) { suffixTypeToUse =groupType; } else{ suffixTypeToUse = STANDARDGROUP_TYPE_VAL; } - for (Element suffix : suffixes) { - String suffixValue = suffix.getAttributeValue(VALUE_ATR); - Elements suffixRuleTags = suffixRules.getSuffixRuleTags(suffixTypeToUse, suffixValue, subgroupType); - for (int j = 0; j < suffixRuleTags.size(); j++) { - Element suffixRuleTag = suffixRuleTags.get(j); - String suffixRuleTagName = suffixRuleTag.getLocalName(); - if (suffixRuleTagName.equals(SUFFIXRULES_CONVERTHYDROXYGROUPSTOOUTATOMS_EL)){ + for (Element suffix : suffixes) { + String suffixValue = suffix.getAttributeValue(VALUE_ATR); + List suffixRules = suffixApplier.getSuffixRuleTags(suffixTypeToUse, suffixValue, subgroupType); + for (SuffixRule suffixRule : suffixRules) { + SuffixRuleType type =suffixRule.getType(); + if (type == SuffixRuleType.convertHydroxyGroupsToOutAtoms) { convertHydroxyGroupsToOutAtoms(frag); } - else if (suffixRuleTagName.equals(SUFFIXRULES_CONVERTHYDROXYGROUPSTOPOSITIVECHARGE_EL)){ + else if (type == SuffixRuleType.convertHydroxyGroupsToPositiveCharge) { convertHydroxyGroupsToPositiveCharge(frag); } - } - } + } + } } /** @@ -2249,7 +2464,7 @@ private void addFunctionalAtomsToHydroxyGroups(Atom atom) throws StructureBuildingException { List neighbours = atom.getAtomNeighbours(); for (Atom neighbour : neighbours) { - if (neighbour.getElement().equals("O") && neighbour.getCharge()==0 && neighbour.getAtomNeighbours().size()==1 && atom.getBondToAtomOrThrow(neighbour).getOrder()==1){ + if (neighbour.getElement() == ChemEl.O && neighbour.getCharge() == 0 && neighbour.getBondCount() == 1 && atom.getBondToAtomOrThrow(neighbour).getOrder() == 1){ neighbour.getFrag().addFunctionalAtom(neighbour); } } @@ -2263,69 +2478,24 @@ private void chargeHydroxyGroups(Atom atom) throws StructureBuildingException { List neighbours = atom.getAtomNeighbours(); for (Atom neighbour : neighbours) { - if (neighbour.getElement().equals("O") && neighbour.getCharge()==0 && neighbour.getAtomNeighbours().size()==1 && atom.getBondToAtomOrThrow(neighbour).getOrder()==1){ + if (neighbour.getElement() == ChemEl.O && neighbour.getCharge()==0 && neighbour.getBondCount()==1 && atom.getBondToAtomOrThrow(neighbour).getOrder()==1){ neighbour.addChargeAndProtons(-1, -1); } } } - - /** - * Removes a terminal oxygen from the atom - * An exception is thrown if no suitable oxygen could be found connected to the atom - * Note that [N+][O-] is treated as N=O - * @param atom - * @throws StructureBuildingException - */ - private void removeTerminalOxygen(Atom atom, int desiredBondOrder) throws StructureBuildingException { - //TODO prioritise [N+][O-] - List neighbours = atom.getAtomNeighbours(); - for (Atom neighbour : neighbours) { - if (neighbour.getElement().equals("O") && neighbour.getAtomNeighbours().size()==1){ - Bond b = atom.getBondToAtomOrThrow(neighbour); - if (b.getOrder()==desiredBondOrder && neighbour.getCharge()==0){ - FragmentTools.removeTerminalAtom(state, neighbour); - if (atom.getLambdaConventionValency()!=null){//corrects valency for phosphin/arsin/stibin - atom.setLambdaConventionValency(atom.getLambdaConventionValency()-desiredBondOrder); - } - if (atom.getMinimumValency()!=null){//corrects valency for phosphin/arsin/stibin - atom.setMinimumValency(atom.getMinimumValency()-desiredBondOrder); - } - return; - } - else if (neighbour.getCharge() ==-1 && b.getOrder()==1 && desiredBondOrder == 2){ - if (atom.getCharge() ==1 && atom.getElement().equals("N")){ - FragmentTools.removeTerminalAtom(state, neighbour); - atom.neutraliseCharge(); - return; - } - } - } - } - if (desiredBondOrder ==2){ - throw new StructureBuildingException("Double bonded oxygen not found at suffix attachment position. Perhaps a suffix has been used inappropriately"); - } - else if (desiredBondOrder ==1){ - throw new StructureBuildingException("Hydroxy oxygen not found at suffix attachment position. Perhaps a suffix has been used inappropriately"); - } - else { - throw new StructureBuildingException("Suitable oxygen not found at suffix attachment position Perhaps a suffix has been used inappropriately"); - } - - } /** * Given a fragment removes all hydroxy groups and adds a valency 1 outAtom to the adjacent atom for each hydroxy group * Note that O[OH] is not considered a hydroxy c.f. carbonoperoxoyl * @param frag - * @throws StructureBuildingException */ - private void convertHydroxyGroupsToOutAtoms(Fragment frag) throws StructureBuildingException { + private void convertHydroxyGroupsToOutAtoms(Fragment frag) { List atomList = frag.getAtomList(); for (Atom atom : atomList) { - if (atom.getElement().equals("O") && atom.getCharge()==0 && atom.getBonds().size()==1 && + if (atom.getElement() == ChemEl.O && atom.getCharge()==0 && atom.getBondCount()==1 && atom.getFirstBond().getOrder()==1 && atom.getOutValency() == 0){ Atom adjacentAtom = atom.getAtomNeighbours().get(0); - if (!adjacentAtom.getElement().equals("O")){ + if (adjacentAtom.getElement() != ChemEl.O){ state.fragManager.removeAtomAndAssociatedBonds(atom); frag.addOutAtom(adjacentAtom, 1, true); } @@ -2337,15 +2507,14 @@ * Given a fragment removes all hydroxy groups and applies ylium to the adjacent atom (+1 charge -1 proton) * Note that O[OH] is not considered a hydroxy * @param frag - * @throws StructureBuildingException */ - private void convertHydroxyGroupsToPositiveCharge(Fragment frag) throws StructureBuildingException { + private void convertHydroxyGroupsToPositiveCharge(Fragment frag) { List atomList = frag.getAtomList(); for (Atom atom : atomList) { - if (atom.getElement().equals("O") && atom.getCharge()==0 && atom.getBonds().size()==1 && + if (atom.getElement() == ChemEl.O && atom.getCharge()==0 && atom.getBondCount()==1 && atom.getFirstBond().getOrder()==1 && atom.getOutValency() == 0){ Atom adjacentAtom = atom.getAtomNeighbours().get(0); - if (!adjacentAtom.getElement().equals("O")){ + if (adjacentAtom.getElement() != ChemEl.O){ state.fragManager.removeAtomAndAssociatedBonds(atom); adjacentAtom.addChargeAndProtons(1, -1); } @@ -2369,7 +2538,7 @@ } Atom firstAtomOfPrefix = suffixPrefixFrag.getFirstAtom(); firstAtomOfPrefix.addLocant("X");//make sure this atom is not given a locant - Fragment suffixFrag = state.xmlFragmentMap.get(suffix); + Fragment suffixFrag = suffix.getFrag(); state.fragManager.incorporateFragment(suffixPrefixFrag, suffixFrag); //manipulate suffixFrag such that all the bonds to the first atom (the R) go instead to the first atom of suffixPrefixFrag. @@ -2387,37 +2556,38 @@ } /** - * Checks through the groups accesible from the startingElement taking into account brackets (i.e. those that it is feasible that the group of the startingElement could substitute onto). - * It is assumed that one does not intentionally locant onto something in a deeper level of bracketting (not implicit bracketing). e.g. 2-propyl(ethyl)ammonia will give prop-2-yl + * Checks through the groups accessible from the startingElement taking into account brackets (i.e. those that it is feasible that the group of the startingElement could substitute onto). + * It is assumed that one does not intentionally locant onto something in a deeper level of bracketing (not implicit bracketing). e.g. 2-propyl(ethyl)ammonia will give prop-2-yl + * @param state * @param startingElement * @param locant: the locant string to check for the presence of * @return whether the locant was found * @throws StructureBuildingException */ - private boolean checkLocantPresentOnPotentialRoot(Element startingElement, String locant) throws StructureBuildingException { + static boolean checkLocantPresentOnPotentialRoot(BuildState state, Element startingElement, String locant) throws StructureBuildingException { boolean foundSibling =false; - Stack s = new Stack(); + Deque s = new ArrayDeque(); s.add(startingElement); boolean doneFirstIteration =false;//check on index only done on first iteration to only get elements with an index greater than the starting element while (s.size()>0){ - Element currentElement =s.pop(); - Element parent = (Element)currentElement.getParent(); - List siblings = XOMTools.getChildElementsWithTagNames(parent, new String[]{BRACKET_EL, SUBSTITUENT_EL, ROOT_EL}); + Element currentElement =s.removeLast(); + Element parent = currentElement.getParent(); + List siblings = OpsinTools.getChildElementsWithTagNames(parent, new String[]{BRACKET_EL, SUBSTITUENT_EL, ROOT_EL}); int indexOfCurrentElement =parent.indexOf(currentElement); for (Element bracketOrSub : siblings) { if (!doneFirstIteration && parent.indexOf(bracketOrSub) <= indexOfCurrentElement){ continue; } - if (bracketOrSub.getLocalName().equals(BRACKET_EL)){//only want to consider implicit brackets, not proper brackets + if (bracketOrSub.getName().equals(BRACKET_EL)){//only want to consider implicit brackets, not proper brackets if (bracketOrSub.getAttribute(TYPE_ATR)==null){ continue; } - s.push((Element)bracketOrSub.getChild(0)); + s.add(bracketOrSub.getChild(0)); } else{ Element group = bracketOrSub.getFirstChildElement(GROUP_EL); - Fragment groupFrag =state.xmlFragmentMap.get(group); + Fragment groupFrag = group.getFrag(); if (groupFrag.hasLocant(locant)){ return true; } @@ -2429,9 +2599,9 @@ } } } - List conjunctiveGroups = XOMTools.getNextSiblingsOfType(group, CONJUNCTIVESUFFIXGROUP_EL); + List conjunctiveGroups = OpsinTools.getNextSiblingsOfType(group, CONJUNCTIVESUFFIXGROUP_EL); for (Element conjunctiveGroup : conjunctiveGroups) { - if (state.xmlFragmentMap.get(conjunctiveGroup).hasLocant(locant)){ + if (conjunctiveGroup.getFrag().hasLocant(locant)){ return true; } } @@ -2442,25 +2612,25 @@ } if (!foundSibling){//Special case: anything the group could potentially substitute onto is in a bracket. The bracket is checked recursively - s = new Stack(); + s = new ArrayDeque(); s.add(startingElement); doneFirstIteration =false;//check on index only done on first iteration to only get elements with an index greater than the starting element while (s.size()>0){ - Element currentElement =s.pop(); - Element parent = (Element)currentElement.getParent(); - List siblings = XOMTools.getChildElementsWithTagNames(parent, new String[]{BRACKET_EL, SUBSTITUENT_EL, ROOT_EL}); + Element currentElement =s.removeLast(); + Element parent = currentElement.getParent(); + List siblings = OpsinTools.getChildElementsWithTagNames(parent, new String[]{BRACKET_EL, SUBSTITUENT_EL, ROOT_EL}); int indexOfCurrentElement =parent.indexOf(currentElement); for (Element bracketOrSub : siblings) { if (!doneFirstIteration && parent.indexOf(bracketOrSub) <= indexOfCurrentElement){ continue; } - if (bracketOrSub.getLocalName().equals(BRACKET_EL)){ - s.push((Element)bracketOrSub.getChild(0)); + if (bracketOrSub.getName().equals(BRACKET_EL)){ + s.add(bracketOrSub.getChild(0)); } else{ Element group = bracketOrSub.getFirstChildElement(GROUP_EL); - Fragment groupFrag =state.xmlFragmentMap.get(group); + Fragment groupFrag = group.getFrag(); if (groupFrag.hasLocant(locant)){ return true; } @@ -2472,9 +2642,9 @@ } } } - List conjunctiveGroups = XOMTools.getNextSiblingsOfType(group, CONJUNCTIVESUFFIXGROUP_EL); + List conjunctiveGroups = OpsinTools.getNextSiblingsOfType(group, CONJUNCTIVESUFFIXGROUP_EL); for (Element conjunctiveGroup : conjunctiveGroups) { - if (state.xmlFragmentMap.get(conjunctiveGroup).hasLocant(locant)){ + if (conjunctiveGroup.getFrag().hasLocant(locant)){ return true; } } @@ -2488,6 +2658,7 @@ } /** Handles special cases in IUPAC nomenclature that are most elegantly solved by modification of the fragment + * Also sets the default in atom for alkanes so that say methylethyl is prop-2-yl rather than propyl * @param groups * @throws StructureBuildingException * @throws ComponentGenerationException @@ -2496,7 +2667,7 @@ for (Element group : groups) { String groupValue =group.getValue(); if (groupValue.equals("porphyrin")|| groupValue.equals("porphin")){ - List hydrogenAddingEls = XOMTools.getChildElementsWithTagName((Element) group.getParent(), INDICATEDHYDROGEN_EL); + List hydrogenAddingEls = group.getParent().getChildElements(INDICATEDHYDROGEN_EL); boolean implicitHydrogenExplicitlySet =false; for (Element hydrogenAddingEl : hydrogenAddingEls) { String locant = hydrogenAddingEl.getAttributeValue(LOCANT_ATR); @@ -2507,20 +2678,71 @@ if (!implicitHydrogenExplicitlySet){ //porphyrins implicitly have indicated hydrogen at the 21/23 positions //directly modify the fragment to avoid problems with locants in for example ring assemblies - Fragment frag =state.xmlFragmentMap.get(group); + Fragment frag = group.getFrag(); frag.getAtomByLocantOrThrow("21").setSpareValency(false); frag.getAtomByLocantOrThrow("23").setSpareValency(false); } } - else if (groupValue.equals("xanthate") || groupValue.equals("xanthic acid") || groupValue.equals("xanthicacid")){ + else if (groupValue.equals("xanthate") || groupValue.equals("xanthat") || groupValue.equals("xanthic acid") || groupValue.equals("xanthicacid")){ //This test needs to be here rather in the ComponentGenerator to correctly reject non substituted thioxanthates Element wordRule = OpsinTools.getParentWordRule(group); if (wordRule.getAttributeValue(WORDRULE_ATR).equals(WordRule.simple.toString())){ - if (XOMTools.getDescendantElementsWithTagName(wordRule, SUBSTITUENT_EL).size()==0){ + if (OpsinTools.getDescendantElementsWithTagName(wordRule, SUBSTITUENT_EL).size()==0){ throw new ComponentGenerationException(groupValue +" describes a class of compounds rather than a particular compound"); } } } + else if (groupValue.equals("adenosin") || groupValue.equals("cytidin") || groupValue.equals("guanosin") || groupValue.equals("inosin") + || groupValue.equals("uridin") || groupValue.equals("xanthosin")){ + //These groups are 2'-deoxy by convention + Element previous = OpsinTools.getPreviousSibling(group); + if (previous != null && previous.getName().equals(SUBTRACTIVEPREFIX_EL) && + previous.getAttributeValue(TYPE_ATR).equals(DEOXY_TYPE_VAL) && previous.getAttributeValue(VALUE_ATR).equals("O") + && previous.getAttribute(LOCANT_ATR) == null) { + Element prev2 = OpsinTools.getPrevious(previous); + if (prev2 == null || !prev2.getName().equals(SUBTRACTIVEPREFIX_EL)) { + Fragment frag = group.getFrag(); + StructureBuildingMethods.applySubtractivePrefix(state, frag, ChemEl.O, "2'"); + previous.detach(); + } + } + } + + if ("yes".equals(group.getAttributeValue(USABLEASJOINER_ATR)) + && group.getAttribute(DEFAULTINID_ATR) == null + && group.getAttribute(DEFAULTINLOCANT_ATR) == null) { + //makes linkers by default attach end to end + Fragment frag = group.getFrag(); + int chainLength = frag.getChainLength(); + if (chainLength > 1){ + boolean connectEndToEndWithPreviousSub = true; + if (group.getAttributeValue(TYPE_ATR).equals(CHAIN_TYPE_VAL) && ALKANESTEM_SUBTYPE_VAL.equals(group.getAttributeValue(SUBTYPE_ATR))){//don't do this if the group is preceded by another alkaneStem e.g. methylethyl makes more sense as prop-2-yl rather than propyl + Element previousSubstituent = OpsinTools.getPreviousSibling(group.getParent()); + if (previousSubstituent != null){ + List previousSubstGroups = previousSubstituent.getChildElements(GROUP_EL); + if (previousSubstGroups.size() == 1){ + Element previousGroup = previousSubstGroups.get(0); + if (previousGroup.getAttributeValue(TYPE_ATR).equals(CHAIN_TYPE_VAL) && ALKANESTEM_SUBTYPE_VAL.equals(previousGroup.getAttributeValue(SUBTYPE_ATR))){ + Element suffixAfterGroup = OpsinTools.getNextSibling(previousGroup, SUFFIX_EL); + if (suffixAfterGroup == null || suffixAfterGroup.getFrag() == null || suffixAfterGroup.getFrag().getOutAtomCount() == 0){ + connectEndToEndWithPreviousSub = false; + } + } + } + } + } + if (connectEndToEndWithPreviousSub){ + Element parent = group.getParent(); + while (parent.getName().equals(BRACKET_EL)) { + parent = parent.getParent(); + } + if (!parent.getName().equals(ROOT_EL)) { + group.addAttribute(new Attribute(DEFAULTINID_ATR, Integer.toString(chainLength))); + frag.setDefaultInAtom(frag.getAtomByLocantOrThrow(Integer.toString(chainLength))); + } + } + } + } } } @@ -2532,162 +2754,186 @@ * @throws ComponentGenerationException */ private void processHW(Element subOrRoot) throws StructureBuildingException, ComponentGenerationException{ - List hwGroups = XOMTools.getChildElementsWithTagNameAndAttribute(subOrRoot, GROUP_EL, SUBTYPE_ATR, HANTZSCHWIDMAN_SUBTYPE_VAL); + List hwGroups = OpsinTools.getChildElementsWithTagNameAndAttribute(subOrRoot, GROUP_EL, SUBTYPE_ATR, HANTZSCHWIDMAN_SUBTYPE_VAL); for (Element group : hwGroups) { - Fragment hwRing =state.xmlFragmentMap.get(group); + Fragment hwRing = group.getFrag(); List atomList =hwRing.getAtomList(); - Element prev = (Element) XOMTools.getPreviousSibling(group); - ArrayList prevs = new ArrayList(); boolean noLocants = true; - while(prev != null && prev.getLocalName().equals(HETEROATOM_EL)) { + List prevs = new ArrayList(); + Element prev = OpsinTools.getPreviousSibling(group); + while(prev != null && prev.getName().equals(HETEROATOM_EL)) { prevs.add(prev); if(prev.getAttribute(LOCANT_ATR) != null) { noLocants = false; } - prev = (Element) XOMTools.getPreviousSibling(prev); + prev = OpsinTools.getPreviousSibling(prev); } + Collections.reverse(prevs); + List heteroatomsToProcess = prevs; + if (atomList.size() == 6 && group.getValue().equals("an")){ boolean hasNitrogen = false; - boolean hasSiorGeorSnorPb=false; - boolean saturatedRing =true; - for(Element heteroatom : prevs){ + boolean hasSiorGeorSnorPb = false; + boolean saturatedRing = true; + for(Element heteroatom : heteroatomsToProcess){ String heteroAtomElement =heteroatom.getAttributeValue(VALUE_ATR); Matcher m = MATCH_ELEMENT_SYMBOL.matcher(heteroAtomElement); if (!m.find()){ - throw new ComponentGenerationException("Failed to extract element from HW heteroatom"); + throw new ComponentGenerationException("Failed to extract element from Hantzsch-Widman heteroatom"); } heteroAtomElement = m.group(); if (heteroAtomElement.equals("N")){ - hasNitrogen=true; + hasNitrogen = true; } if (heteroAtomElement.equals("Si") || heteroAtomElement.equals("Ge") || heteroAtomElement.equals("Sn") || heteroAtomElement.equals("Pb") ){ - hasSiorGeorSnorPb =true; + hasSiorGeorSnorPb = true; } } for (Atom a: atomList) { if (a.hasSpareValency()){ - saturatedRing =false; + saturatedRing = false; } } if (saturatedRing && !hasNitrogen && hasSiorGeorSnorPb){ - throw new ComponentGenerationException("Blocked HW system (6 member saturated ring with no nitrogen but has Si/Ge/Sn/Pb)"); + throw new ComponentGenerationException("Blocked Hantzsch-Widman system (6 member saturated ring with no nitrogen but has Si/Ge/Sn/Pb)"); } } StringBuilder nameSB = new StringBuilder(); - Collections.reverse(prevs); - for(Element heteroatom : prevs){ - nameSB.append(heteroatom.getValue()); + for(Element heteroatom : heteroatomsToProcess) { + String hetValue = heteroatom.getValue(); + if (hetValue.endsWith("a")) { + nameSB.append(hetValue.substring(0, hetValue.length() - 1)); + } + else { + nameSB.append(hetValue); + } } nameSB.append(group.getValue()); - String name = nameSB.toString().toLowerCase(); - if(noLocants && prevs.size() > 0) { - if(specialHWRings.containsKey(name)) { - String[] specialRingInformation =specialHWRings.get(name); + String name = nameSB.toString(); + group.setValue(name); + if(noLocants && heteroatomsToProcess.size() > 0) { + String[] specialRingInformation = specialHWRings.get(name); + if(specialRingInformation != null) { String specialInstruction =specialRingInformation[0]; if (!specialInstruction.equals("")){ if (specialInstruction.equals("blocked")){ - throw new ComponentGenerationException("Blocked HW system"); + throw new ComponentGenerationException("Blocked Hantzsch-Widman system"); } else if (specialInstruction.equals("saturated")){ - for (Atom a: hwRing.getAtomList()) { + for (Atom a: atomList) { a.setSpareValency(false); } } else if (specialInstruction.equals("not_icacid")){ - if (group.getAttribute(SUBSEQUENTUNSEMANTICTOKEN_ATR)==null){ - Element nextEl = (Element) XOMTools.getNextSibling(group); - if (nextEl!=null && nextEl.getLocalName().equals(SUFFIX_EL) && nextEl.getAttribute(LOCANT_ATR)==null && nextEl.getAttributeValue(VALUE_ATR).equals("ic")){ - throw new ComponentGenerationException(name + nextEl.getValue() +" appears to be a generic class name, not a HW ring"); + if (group.getAttribute(SUBSEQUENTUNSEMANTICTOKEN_ATR) == null){ + Element nextEl = OpsinTools.getNextSibling(group); + if (nextEl != null && nextEl.getName().equals(SUFFIX_EL) && nextEl.getAttribute(LOCANT_ATR) == null && nextEl.getAttributeValue(VALUE_ATR).equals("ic")){ + throw new ComponentGenerationException(name + nextEl.getValue() +" appears to be a generic class name, not a Hantzsch-Widman ring"); } } } else if (specialInstruction.equals("not_nothingOrOlate")){ - if (group.getAttribute(SUBSEQUENTUNSEMANTICTOKEN_ATR)==null){ - Element nextEl = (Element) XOMTools.getNextSibling(group); - if (nextEl==null || (nextEl!=null && nextEl.getLocalName().equals(SUFFIX_EL) && nextEl.getAttribute(LOCANT_ATR)==null && nextEl.getAttributeValue(VALUE_ATR).equals("ate"))){ - throw new ComponentGenerationException(name +" has the syntax for a HW ring but probably does not mean that in this context"); + if (group.getAttribute(SUBSEQUENTUNSEMANTICTOKEN_ATR) == null){ + Element nextEl = OpsinTools.getNextSibling(group); + if (nextEl==null || (nextEl!=null && nextEl.getName().equals(SUFFIX_EL) && nextEl.getAttribute(LOCANT_ATR)==null && nextEl.getAttributeValue(VALUE_ATR).equals("ate"))){ + throw new ComponentGenerationException(name +" has the syntax for a Hantzsch-Widman ring but probably does not mean that in this context"); } } } else{ - throw new ComponentGenerationException("OPSIN Bug: Unrecognised special HW ring instruction"); + throw new ComponentGenerationException("OPSIN Bug: Unrecognised special Hantzsch-Widman ring instruction"); } } //something like oxazole where by convention locants go 1,3 or a inorganic HW-like system - for (int j = 1; j < specialRingInformation.length; j++) { - Atom a =hwRing.getAtomByLocantOrThrow(Integer.toString(j)); - a.setElement(specialRingInformation[j]); + for (int i = 1; i < specialRingInformation.length; i++) { + Atom a = hwRing.getAtomByLocantOrThrow(Integer.toString(i)); + a.setElement(ChemEl.valueOf(specialRingInformation[i])); } - for(Element p : prevs){ + for(Element p : heteroatomsToProcess){ p.detach(); } - prevs.clear(); + heteroatomsToProcess.clear(); } } - HashSet elementsToRemove =new HashSet(); - for(Element heteroatom : prevs){//add locanted heteroatoms - if (heteroatom.getAttribute(LOCANT_ATR) !=null){ - String locant =heteroatom.getAttributeValue(LOCANT_ATR); - String elementReplacement =heteroatom.getAttributeValue(VALUE_ATR); - Matcher m = MATCH_ELEMENT_SYMBOL.matcher(elementReplacement); - if (!m.find()){ - throw new ComponentGenerationException("Failed to extract element from HW heteroatom"); - } - elementReplacement = m.group(); - Atom a =hwRing.getAtomByLocantOrThrow(locant); - a.setElement(elementReplacement); - if (heteroatom.getAttribute(LAMBDA_ATR)!=null){ - a.setLambdaConventionValency(Integer.parseInt(heteroatom.getAttributeValue(LAMBDA_ATR))); - } - heteroatom.detach(); - elementsToRemove.add(heteroatom); + + //add locanted heteroatoms + for (Iterator it = heteroatomsToProcess.iterator(); it.hasNext();) { + Element heteroatom = it.next(); + String locant = heteroatom.getAttributeValue(LOCANT_ATR); + if (locant == null) { + continue; } - } - for(Element p : elementsToRemove){ - prevs.remove(p); - } - - //add unlocanted heteroatoms - int defaultLocant=1; - for(Element heteroatom : prevs){ - String elementReplacement =heteroatom.getAttributeValue(VALUE_ATR); + String elementReplacement = heteroatom.getAttributeValue(VALUE_ATR); Matcher m = MATCH_ELEMENT_SYMBOL.matcher(elementReplacement); if (!m.find()){ - throw new ComponentGenerationException("Failed to extract element from HW heteroatom"); + throw new ComponentGenerationException("Failed to extract element from Hantzsch-Widman heteroatom"); } elementReplacement = m.group(); - - while (!hwRing.getAtomByLocantOrThrow(Integer.toString(defaultLocant)).getElement().equals("C")){ - defaultLocant++; - } - Atom a =hwRing.getAtomByLocantOrThrow(Integer.toString(defaultLocant)); - a.setElement(elementReplacement); - if (heteroatom.getAttribute(LAMBDA_ATR)!=null){ + Atom a = hwRing.getAtomByLocantOrThrow(locant); + a.setElement(ChemEl.valueOf(elementReplacement)); + if (heteroatom.getAttribute(LAMBDA_ATR) != null){ a.setLambdaConventionValency(Integer.parseInt(heteroatom.getAttributeValue(LAMBDA_ATR))); } heteroatom.detach(); + it.remove(); } - - Elements deltas = subOrRoot.getChildElements(DELTA_EL);//add specified double bonds - for (int j = 0; j < deltas.size(); j++) { - String locantOfDoubleBond = deltas.get(j).getValue(); + + List deltaEls = subOrRoot.getChildElements(DELTA_EL); + //add locanted double bonds and convert unlocanted to unsaturators + for (Element deltaEl : deltaEls) { + String locantOfDoubleBond = deltaEl.getValue(); if (locantOfDoubleBond.equals("")){ - Element newUnsaturator = new Element(UNSATURATOR_EL); + Element newUnsaturator = new TokenEl(UNSATURATOR_EL); newUnsaturator.addAttribute(new Attribute(VALUE_ATR, "2")); - XOMTools.insertAfter(group, newUnsaturator); + OpsinTools.insertAfter(group, newUnsaturator); } else{ Atom firstInDoubleBond = hwRing.getAtomByLocantOrThrow(locantOfDoubleBond); - Atom secondInDoubleBond = hwRing.getAtomByIDOrThrow(firstInDoubleBond.getID() +1); - Bond b = firstInDoubleBond.getBondToAtomOrThrow(secondInDoubleBond); - b.setOrder(2); + FragmentTools.unsaturate(firstInDoubleBond, 2, hwRing); + } + deltaEl.detach(); + } + + //add unlocanted heteroatoms + int hetAtomsToProcess = heteroatomsToProcess.size(); + if (hetAtomsToProcess > 0) { + List carbonAtomsInRing = new ArrayList(); + for (Atom atom : atomList) { + if (atom.getElement() == ChemEl.C) { + carbonAtomsInRing.add(atom); + } + } + + if (hetAtomsToProcess> 1 && hetAtomsToProcess < (carbonAtomsInRing.size() -1)) { + Element possibleBenzo = OpsinTools.getPreviousSibling(group, GROUP_EL); + //assume benzo fusions or hwring as a fusion prefix produce unambiguous heteroatom positioning + if (!(possibleBenzo != null && (possibleBenzo.getValue().equals("benz") || possibleBenzo.getValue().equals("benzo")) + || "o".equals(group.getAttributeValue((SUBSEQUENTUNSEMANTICTOKEN_ATR))))) { + state.addIsAmbiguous("Heteroatom positioning in the Hantzsch-Widman name " + name); + } + } + if (hetAtomsToProcess > carbonAtomsInRing.size()) { + throw new StructureBuildingException(hetAtomsToProcess +" heteroatoms were specified for a Hantzsch-Widman ring with only " + carbonAtomsInRing.size() + " atoms"); + } + for (int i = 0; i < hetAtomsToProcess; i++) { + Element heteroatom = heteroatomsToProcess.get(i); + String elementReplacement = heteroatom.getAttributeValue(VALUE_ATR); + Matcher m = MATCH_ELEMENT_SYMBOL.matcher(elementReplacement); + if (!m.find()){ + throw new ComponentGenerationException("Failed to extract element from Hantzsch-Widman heteroatom"); + } + elementReplacement = m.group(); + Atom a = carbonAtomsInRing.get(i); + a.setElement(ChemEl.valueOf(elementReplacement)); + if (heteroatom.getAttribute(LAMBDA_ATR)!=null){ + a.setLambdaConventionValency(Integer.parseInt(heteroatom.getAttributeValue(LAMBDA_ATR))); + } + heteroatom.detach(); } - deltas.get(j).detach(); } - XOMTools.setTextChild(group, name); } } @@ -2699,18 +2945,18 @@ * @throws StructureBuildingException */ private void assignElementSymbolLocants(Element subOrRoot) throws StructureBuildingException { - List groups = XOMTools.getChildElementsWithTagName(subOrRoot, GROUP_EL); + List groups = subOrRoot.getChildElements(GROUP_EL); Element lastGroupElementInSubOrRoot =groups.get(groups.size()-1); List suffixFragments = new ArrayList(state.xmlSuffixMap.get(lastGroupElementInSubOrRoot)); - Fragment suffixableFragment =state.xmlFragmentMap.get(lastGroupElementInSubOrRoot); + Fragment suffixableFragment = lastGroupElementInSubOrRoot.getFrag(); //treat conjunctive suffixesas if they were suffixes - List conjunctiveGroups = XOMTools.getChildElementsWithTagName(subOrRoot, CONJUNCTIVESUFFIXGROUP_EL); + List conjunctiveGroups = subOrRoot.getChildElements(CONJUNCTIVESUFFIXGROUP_EL); for (Element group : conjunctiveGroups) { - suffixFragments.add(state.xmlFragmentMap.get(group)); + suffixFragments.add(group.getFrag()); } FragmentTools.assignElementLocants(suffixableFragment, suffixFragments); for (int i = groups.size()-2; i>=0; i--) { - FragmentTools.assignElementLocants(state.xmlFragmentMap.get(groups.get(i)), new ArrayList()); + FragmentTools.assignElementLocants(groups.get(i).getFrag(), new ArrayList()); } } @@ -2722,7 +2968,7 @@ * @throws StructureBuildingException */ private void processRingAssemblies(Element subOrRoot) throws ComponentGenerationException, StructureBuildingException { - List ringAssemblyMultipliers = XOMTools.getChildElementsWithTagName(subOrRoot, RINGASSEMBLYMULTIPLIER_EL); + List ringAssemblyMultipliers = subOrRoot.getChildElements(RINGASSEMBLYMULTIPLIER_EL); for (Element multiplier : ringAssemblyMultipliers) { int mvalue = Integer.parseInt(multiplier.getAttributeValue(VALUE_ATR)); @@ -2730,37 +2976,37 @@ * Populate locants with locants. Two locants are required for every pair of rings to be joined. * e.g. bi requires 2, ter requires 4 etc. */ - List> ringJoiningLocants =new ArrayList>(); - Element potentialLocant =(Element)XOMTools.getPreviousSibling(multiplier); - Element group =(Element)XOMTools.getNextSibling(multiplier, GROUP_EL); - if (potentialLocant!=null && (potentialLocant.getLocalName().equals(COLONORSEMICOLONDELIMITEDLOCANT_EL)||potentialLocant.getLocalName().equals(LOCANT_EL)) ){//a locant appears to have been provided to indicate how to connect the rings of the ringAssembly + List> ringJoiningLocants = new ArrayList>(); + Element potentialLocant = OpsinTools.getPreviousSibling(multiplier); + Element group = OpsinTools.getNextSibling(multiplier, GROUP_EL); + if (potentialLocant != null && (potentialLocant.getName().equals(COLONORSEMICOLONDELIMITEDLOCANT_EL) || potentialLocant.getName().equals(LOCANT_EL))){ //a locant appears to have been provided to indicate how to connect the rings of the ringAssembly if (ORTHOMETAPARA_TYPE_VAL.equals(potentialLocant.getAttributeValue(TYPE_ATR))){//an OMP locant has been provided to indicate how to connect the rings of the ringAssembly - String locant2 =potentialLocant.getValue(); - String locant1 ="1"; - ArrayList locantArrayList =new ArrayList(); + String locant2 = potentialLocant.getValue(); + String locant1 = "1"; + List locantArrayList = new ArrayList(); locantArrayList.add("1"); locantArrayList.add("1'"); ringJoiningLocants.add(locantArrayList); - for (int j = 1; j < mvalue -1; j++) { - locantArrayList =new ArrayList(); - locantArrayList.add(locant2 + StringTools.multiplyString("'", j)); - locantArrayList.add(locant1 + StringTools.multiplyString("'", j+1)); + for (int i = 1; i < mvalue - 1; i++) { + locantArrayList = new ArrayList(); + locantArrayList.add(locant2 + StringTools.multiplyString("'", i)); + locantArrayList.add(locant1 + StringTools.multiplyString("'", i + 1)); ringJoiningLocants.add(locantArrayList); } potentialLocant.detach(); } else{ - String locantText =StringTools.removeDashIfPresent(potentialLocant.getValue()); + String locantText = StringTools.removeDashIfPresent(potentialLocant.getValue()); //locantText might be something like 1,1':3',1'' String[] perRingLocantArray = MATCH_COLONORSEMICOLON.split(locantText); - if (perRingLocantArray.length !=(mvalue -1)){ - throw new ComponentGenerationException("Disagreement between number of locants(" + locantText +") and ring assembly multiplier: " + mvalue); + if (perRingLocantArray.length != (mvalue - 1)){ + throw new ComponentGenerationException("Disagreement between number of locants(" + locantText + ") and ring assembly multiplier: " + mvalue); } - if (perRingLocantArray.length!=1 || MATCH_COMMA.split(perRingLocantArray[0]).length!=1){//not for the case of a single locant - for (int j = 0; j < perRingLocantArray.length; j++) { - String[] locantArray = MATCH_COMMA.split(perRingLocantArray[j]); - if (locantArray.length !=2){ - throw new ComponentGenerationException("missing locant, expected 2 locants: " + perRingLocantArray[j]); + if (perRingLocantArray.length != 1 || perRingLocantArray[0].split(",").length != 1){//if there is just a single locant it doesn't relate to how the rings are connected + for (String ringLocantArray : perRingLocantArray) { + String[] locantArray = ringLocantArray.split(","); + if (locantArray.length != 2){ + throw new ComponentGenerationException("missing locant, expected 2 locants: " + ringLocantArray); } ringJoiningLocants.add(Arrays.asList(locantArray)); } @@ -2769,21 +3015,21 @@ } } - Fragment fragmentToResolveAndDuplicate =state.xmlFragmentMap.get(group); + Fragment fragmentToResolveAndDuplicate = group.getFrag(); Element elementToResolve;//temporary element containing elements that should be resolved before the ring is duplicated - Element nextEl =(Element) XOMTools.getNextSibling(multiplier); - if (nextEl.getLocalName().equals(STRUCTURALOPENBRACKET_EL)){//brackets have been provided to aid disambiguation. These brackets are detached e.g. bi(cyclohexyl) - elementToResolve = new Element(SUBSTITUENT_EL); - Element currentEl =nextEl; - nextEl = (Element) XOMTools.getNextSibling(currentEl); + Element nextEl = OpsinTools.getNextSibling(multiplier); + if (nextEl.getName().equals(STRUCTURALOPENBRACKET_EL)){//brackets have been provided to aid disambiguation. These brackets are detached e.g. bi(cyclohexyl) + elementToResolve = new GroupingEl(SUBSTITUENT_EL); + Element currentEl = nextEl; + nextEl = OpsinTools.getNextSibling(currentEl); currentEl.detach(); - while (nextEl !=null && !nextEl.getLocalName().equals(STRUCTURALCLOSEBRACKET_EL)){ - currentEl =nextEl; - nextEl = (Element) XOMTools.getNextSibling(currentEl); + while (nextEl != null && !nextEl.getName().equals(STRUCTURALCLOSEBRACKET_EL)){ + currentEl = nextEl; + nextEl = OpsinTools.getNextSibling(currentEl); currentEl.detach(); - elementToResolve.appendChild(currentEl); + elementToResolve.addChild(currentEl); } - if (nextEl!=null){ + if (nextEl != null){ nextEl.detach(); } } @@ -2791,72 +3037,96 @@ elementToResolve = determineElementsToResolveIntoRingAssembly(multiplier, ringJoiningLocants.size(), fragmentToResolveAndDuplicate.getOutAtomCount()); } - List suffixes = XOMTools.getChildElementsWithTagName(elementToResolve, SUFFIX_EL); - resolveSuffixes(group, suffixes); - StructureBuildingMethods.resolveLocantedFeatures(state, elementToResolve); - StructureBuildingMethods.resolveUnLocantedFeatures(state, elementToResolve); - group.detach(); - XOMTools.insertAfter(multiplier, group); - + List suffixes = elementToResolve.getChildElements(SUFFIX_EL); + suffixApplier.resolveSuffixes(group, suffixes); int bondOrder = 1; - if (fragmentToResolveAndDuplicate.getOutAtomCount()>0){//e.g. bicyclohexanylidene - bondOrder =fragmentToResolveAndDuplicate.getOutAtom(0).getValency(); - } - if (fragmentToResolveAndDuplicate.getOutAtomCount()>1){ + if (fragmentToResolveAndDuplicate.getOutAtomCount() > 1){ throw new StructureBuildingException("Ring assembly fragment should have one or no OutAtoms; not more than one!"); } + if (fragmentToResolveAndDuplicate.getOutAtomCount() == 1) {//e.g. bicyclohexanylidene + bondOrder = fragmentToResolveAndDuplicate.getOutAtom(0).getValency(); + } + + boolean twoRingsJoinedUsingSuffixPosition = ringJoiningLocants.size() == 0 && mvalue == 2 && fragmentToResolveAndDuplicate.getOutAtomCount() == 1; + if (!twoRingsJoinedUsingSuffixPosition && fragmentToResolveAndDuplicate.getOutAtomCount() == 1) { + //remove yl (or the like). Need to make sure that resolveUnLocantedFeatures doesn't consider 2,2'-bipyridyl ambiguous due to the location of the ul + fragmentToResolveAndDuplicate.removeOutAtom(0); + } - List clonedFragments = new ArrayList(); - for (int j = 1; j < mvalue; j++) { - clonedFragments.add(state.fragManager.copyAndRelabelFragment(fragmentToResolveAndDuplicate, j)); - } - for (int j = 0; j < mvalue-1; j++) { - Fragment clone =clonedFragments.get(j); - Atom atomOnParent; - Atom atomOnLatestClone; - if (ringJoiningLocants.size()>0){//locants defined - atomOnParent = fragmentToResolveAndDuplicate.getAtomByLocantOrThrow(ringJoiningLocants.get(j).get(0)); - String secondLocant = ringJoiningLocants.get(j).get(1); - if (mvalue ==2 && !secondLocant.endsWith("'")){ - //Allow prime to be (incorrectly) omitted on second locant in bi ring assemblies e.g. 2,2-bipyridine - try { - atomOnLatestClone = clone.getAtomByLocantOrThrow(secondLocant); - } - catch (StructureBuildingException e){ - atomOnLatestClone = clone.getAtomByLocant(secondLocant + "'"); - if (atomOnLatestClone == null){ - throw e; + StructureBuildingMethods.resolveLocantedFeatures(state, elementToResolve); + StructureBuildingMethods.resolveUnLocantedFeatures(state, elementToResolve); + + group.detach(); + OpsinTools.insertAfter(multiplier, group); + + if (twoRingsJoinedUsingSuffixPosition){ + Fragment clone = state.fragManager.copyAndRelabelFragment(fragmentToResolveAndDuplicate, 1); + Atom atomOnParent = fragmentToResolveAndDuplicate.getOutAtom(0).getAtom(); + Atom atomOnClone = clone.getOutAtom(0).getAtom(); + fragmentToResolveAndDuplicate.removeOutAtom(0); + clone.removeOutAtom(0); + state.fragManager.incorporateFragment(clone, atomOnClone, fragmentToResolveAndDuplicate, atomOnParent, bondOrder); + } + else { + List clonedFragments = new ArrayList(); + for (int j = 1; j < mvalue; j++) { + clonedFragments.add(state.fragManager.copyAndRelabelFragment(fragmentToResolveAndDuplicate, j)); + } + Fragment lastRingUnlocantedBondedTo = null; + for (int i = 0; i < mvalue - 1; i++) { + Fragment clone = clonedFragments.get(i); + Atom atomOnParent; + Atom atomOnLatestClone; + if (ringJoiningLocants.size() > 0){//locants defined + atomOnParent = fragmentToResolveAndDuplicate.getAtomByLocantOrThrow(ringJoiningLocants.get(i).get(0)); + String secondLocant = ringJoiningLocants.get(i).get(1); + if (mvalue ==2 && !secondLocant.endsWith("'")){ + //Allow prime to be (incorrectly) omitted on second locant in bi ring assemblies e.g. 2,2-bipyridine + try { + atomOnLatestClone = clone.getAtomByLocantOrThrow(secondLocant); + } + catch (StructureBuildingException e){ + atomOnLatestClone = clone.getAtomByLocant(secondLocant + "'"); + if (atomOnLatestClone == null){ + throw e; + } } } + else{ + atomOnLatestClone = clone.getAtomByLocantOrThrow(secondLocant); + } } else{ - atomOnLatestClone = clone.getAtomByLocantOrThrow(secondLocant); - } - } - else{ - if (fragmentToResolveAndDuplicate.getOutAtomCount()>0 && mvalue==2){ - atomOnParent = fragmentToResolveAndDuplicate.getOutAtom(0).getAtom(); - atomOnLatestClone = clone.getOutAtom(0).getAtom(); - } - else{ - atomOnParent =fragmentToResolveAndDuplicate.getAtomOrNextSuitableAtomOrThrow(fragmentToResolveAndDuplicate.getDefaultInAtom(), bondOrder, true); - atomOnLatestClone = clone.getAtomOrNextSuitableAtomOrThrow(clone.getDefaultInAtom(), bondOrder, true); + List potentialAtomsOnParent; + if (lastRingUnlocantedBondedTo == null){ + potentialAtomsOnParent = FragmentTools.findSubstituableAtoms(fragmentToResolveAndDuplicate, bondOrder); + } + else{ + potentialAtomsOnParent = FragmentTools.findSubstituableAtoms(lastRingUnlocantedBondedTo, bondOrder); + } + List potentialAtomsOnClone = FragmentTools.findSubstituableAtoms(clone, bondOrder); + if (potentialAtomsOnParent.isEmpty() || potentialAtomsOnClone.isEmpty()) { + throw new StructureBuildingException("Unable to find suitable atom for unlocanted ring assembly construction"); + } + if (AmbiguityChecker.isSubstitutionAmbiguous(potentialAtomsOnParent, 1)) { + state.addIsAmbiguous("Choice of atoms to form ring assembly: " + group.getValue()); + } + if (AmbiguityChecker.isSubstitutionAmbiguous(potentialAtomsOnClone, 1)) { + state.addIsAmbiguous("Choice of atoms to form ring assembly: " + group.getValue()); + } + atomOnParent = potentialAtomsOnParent.get(0); + atomOnLatestClone = potentialAtomsOnClone.get(0); + lastRingUnlocantedBondedTo = clone; } + state.fragManager.incorporateFragment(clone, atomOnLatestClone, fragmentToResolveAndDuplicate, atomOnParent, bondOrder); } - if (fragmentToResolveAndDuplicate.getOutAtomCount()>0){ - fragmentToResolveAndDuplicate.removeOutAtom(0); - } - if (clone.getOutAtomCount()>0){ - clone.removeOutAtom(0); - } - state.fragManager.incorporateFragment(clone, atomOnLatestClone, fragmentToResolveAndDuplicate, atomOnParent, bondOrder); - fragmentToResolveAndDuplicate.setDefaultInAtom(clone.getDefaultInAtom()); } - XOMTools.setTextChild(group, multiplier.getValue() +group.getValue()); - Element possibleOpenStructuralBracket = (Element) XOMTools.getPreviousSibling(multiplier); - if (possibleOpenStructuralBracket!=null && possibleOpenStructuralBracket.getLocalName().equals(STRUCTURALOPENBRACKET_EL)){//e.g. [2,2'-bipyridin]. + + group.setValue(multiplier.getValue() + group.getValue()); + Element possibleOpenStructuralBracket = OpsinTools.getPreviousSibling(multiplier); + if (possibleOpenStructuralBracket!=null && possibleOpenStructuralBracket.getName().equals(STRUCTURALOPENBRACKET_EL)){//e.g. [2,2'-bipyridin]. //To emphasise there can actually be two sets of structural brackets e.g. [1,1'-bi(cyclohexyl)] - XOMTools.getNextSibling(possibleOpenStructuralBracket, STRUCTURALCLOSEBRACKET_EL).detach(); + OpsinTools.getNextSibling(possibleOpenStructuralBracket, STRUCTURALCLOSEBRACKET_EL).detach(); possibleOpenStructuralBracket.detach(); } multiplier.detach(); @@ -2865,45 +3135,58 @@ /** * Given the element after the ring assembly multiplier determines which siblings should be resolved by adding them to elementToResolve - * @param ringJoiningLocants - * @param elementAfterMultiplier - * @param elementToResolve - * @return - * @throws ComponentGenerationException + * @param multiplier + * @param ringJoiningLocants + * @param outAtomCount + * @return + * @throws ComponentGenerationException */ private Element determineElementsToResolveIntoRingAssembly(Element multiplier, int ringJoiningLocants, int outAtomCount) throws ComponentGenerationException { - Element elementToResolve = new Element(SUBSTITUENT_EL); + Element elementToResolve = new GroupingEl(SUBSTITUENT_EL); boolean groupFound = false; boolean inlineSuffixSeen = outAtomCount > 0; - Element currentEl = (Element) XOMTools.getNextSibling(multiplier); - while (currentEl !=null){ - Element nextEl = (Element) XOMTools.getNextSibling(currentEl); - if (!groupFound || currentEl.getLocalName().equals(SUFFIX_EL) && currentEl.getAttributeValue(TYPE_ATR).equals(CHARGE_TYPE_VAL)|| currentEl.getLocalName().equals(UNSATURATOR_EL)){ + Element currentEl = OpsinTools.getNextSibling(multiplier); + while (currentEl != null) { + //Attach all until group found + //Attach unlocanted charge suffixes/unsaturation + //Attach one unlocanted unmultiplied inline suffix (or one locanted unmultiplied inline suffix if it is a bi ring assembly e.g. bipyridin-2-yl) + Element nextEl = OpsinTools.getNextSibling(currentEl); + if (!groupFound) { currentEl.detach(); - elementToResolve.appendChild(currentEl); - } - else if (currentEl.getLocalName().equals(SUFFIX_EL)){ - state.xmlFragmentMap.get(currentEl); - if (!inlineSuffixSeen && currentEl.getAttributeValue(TYPE_ATR).equals(INLINE_TYPE_VAL) && currentEl.getAttributeValue(MULTIPLIED_ATR) ==null - && (currentEl.getAttribute(LOCANT_ATR)==null || ("2".equals(multiplier.getAttributeValue(VALUE_ATR)) && ringJoiningLocants==0)) && state.xmlFragmentMap.get(currentEl)==null){ - inlineSuffixSeen = true; + elementToResolve.addChild(currentEl); + if (currentEl.getName().equals(GROUP_EL)) { + groupFound = true; + } + } + else { + if (currentEl.getName().equals(SUFFIX_EL)) { + String suffixType = currentEl.getAttributeValue(TYPE_ATR); + if (suffixType.equals(CHARGE_TYPE_VAL) && currentEl.getAttribute(LOCANT_ATR) == null) { + currentEl.detach(); + elementToResolve.addChild(currentEl); + } + else if (!inlineSuffixSeen && suffixType.equals(INLINE_TYPE_VAL) && currentEl.getAttributeValue(MULTIPLIED_ATR) == null + && (currentEl.getAttribute(LOCANT_ATR) == null || ("2".equals(multiplier.getAttributeValue(VALUE_ATR)) && ringJoiningLocants == 0)) && currentEl.getFrag() == null){ + inlineSuffixSeen = true; + currentEl.detach(); + elementToResolve.addChild(currentEl); + } + else { + break; + } + } + else if (currentEl.getName().equals(UNSATURATOR_EL) && currentEl.getAttribute(LOCANT_ATR) == null) { currentEl.detach(); - elementToResolve.appendChild(currentEl); + elementToResolve.addChild(currentEl); } - else{ + else { break; } } - else{ - break; - } - if (currentEl.getLocalName().equals(GROUP_EL)){ - groupFound = true; - } currentEl = nextEl; } - Element parent = (Element) multiplier.getParent(); - if (!parent.getLocalName().equals(SUBSTITUENT_EL) && XOMTools.getChildElementsWithTagNameAndAttribute(parent, SUFFIX_EL, TYPE_ATR, INLINE_TYPE_VAL).size()!=0){ + Element parent = multiplier.getParent(); + if (!parent.getName().equals(SUBSTITUENT_EL) && OpsinTools.getChildElementsWithTagNameAndAttribute(parent, SUFFIX_EL, TYPE_ATR, INLINE_TYPE_VAL).size() > 0){ throw new ComponentGenerationException("Unexpected radical adding suffix on ring assembly"); } return elementToResolve; @@ -2920,7 +3203,7 @@ * @throws StructureBuildingException */ private void processPolyCyclicSpiroNomenclature(Element subOrRoot) throws ComponentGenerationException, StructureBuildingException { - List polyCyclicSpiros = XOMTools.getChildElementsWithTagName(subOrRoot, POLYCYCLICSPIRO_EL); + List polyCyclicSpiros = subOrRoot.getChildElements(POLYCYCLICSPIRO_EL); if (polyCyclicSpiros.size()>0){ Element polyCyclicSpiroDescriptor = polyCyclicSpiros.get(0); String value = polyCyclicSpiroDescriptor.getAttributeValue(VALUE_ATR); @@ -2959,100 +3242,149 @@ } private void processNonIdenticalPolyCyclicSpiro(Element polyCyclicSpiroDescriptor) throws ComponentGenerationException, StructureBuildingException { - Element subOrRoot = (Element) polyCyclicSpiroDescriptor.getParent(); - List groups = XOMTools.getChildElementsWithTagName(subOrRoot, GROUP_EL); - if (groups.size()<2){ - throw new ComponentGenerationException("OPSIN Bug: Atleast two groups were expected in polycyclic spiro system"); - } - Element openBracket = (Element) XOMTools.getNextSibling(polyCyclicSpiroDescriptor); - if (!openBracket.getLocalName().equals(STRUCTURALOPENBRACKET_EL)){ + Element subOrRoot = polyCyclicSpiroDescriptor.getParent(); + Element openBracket = OpsinTools.getNextSibling(polyCyclicSpiroDescriptor); + if (!openBracket.getName().equals(STRUCTURALOPENBRACKET_EL)){ throw new ComponentGenerationException("OPSIN Bug: Open bracket not found where open bracket expeced"); } - List spiroBracketElements = XOMTools.getSiblingsUpToElementWithTagName(openBracket, STRUCTURALCLOSEBRACKET_EL); - Element closeBracket = (Element) XOMTools.getNextSibling(spiroBracketElements.get(spiroBracketElements.size()-1)); - if (closeBracket == null || !closeBracket.getLocalName().equals(STRUCTURALCLOSEBRACKET_EL)){ + List spiroBracketElements = OpsinTools.getSiblingsUpToElementWithTagName(openBracket, STRUCTURALCLOSEBRACKET_EL); + Element closeBracket = OpsinTools.getNextSibling(spiroBracketElements.get(spiroBracketElements.size() - 1)); + if (closeBracket == null || !closeBracket.getName().equals(STRUCTURALCLOSEBRACKET_EL)){ throw new ComponentGenerationException("OPSIN Bug: Open bracket not found where open bracket expeced"); } + List groups = new ArrayList(); + for (Element spiroBracketElement : spiroBracketElements) { + String name = spiroBracketElement.getName(); + if (name.equals(GROUP_EL)) { + groups.add(spiroBracketElement); + } + else if (name.equals(SPIROLOCANT_EL)) { + Element spiroLocant = spiroBracketElement; + String[] locants = StringTools.removeDashIfPresent(spiroLocant.getValue()).split(","); + if (locants.length != 2) { + throw new ComponentGenerationException("Incorrect number of locants found before component of polycyclic spiro system"); + } + boolean changed = false; + Matcher m1 = matchAddedHydrogenBracket.matcher(locants[0]); + if (m1.find()) { + Element addedHydrogenElement = new TokenEl(ADDEDHYDROGEN_EL); + String addedHydrogenLocant = m1.group(1); + int primes = StringTools.countTerminalPrimes(addedHydrogenLocant); + if (primes > 0 && primes == (groups.size() - 1)) {//rings are primeless before spiro fusion (hydrogen is currently added before spiro fusion) + addedHydrogenLocant = addedHydrogenLocant.substring(0, addedHydrogenLocant.length() - primes); + } + addedHydrogenElement.addAttribute(new Attribute(LOCANT_ATR, addedHydrogenLocant)); + OpsinTools.insertBefore(spiroLocant, addedHydrogenElement); + locants[0] = m1.replaceAll(""); + changed = true; + } + Matcher m2 = matchAddedHydrogenBracket.matcher(locants[1]); + if (m2.find()) { + Element addedHydrogenElement = new TokenEl(ADDEDHYDROGEN_EL); + String addedHydrogenLocant = m2.group(1); + int primes = StringTools.countTerminalPrimes(addedHydrogenLocant); + if (primes > 0 && primes == groups.size()) {//rings are primeless before spiro fusion (hydrogen is currently added before spiro fusion) + addedHydrogenLocant = addedHydrogenLocant.substring(0, addedHydrogenLocant.length() - primes); + } + addedHydrogenElement.addAttribute(new Attribute(LOCANT_ATR, addedHydrogenLocant)); + OpsinTools.insertAfter(spiroLocant, addedHydrogenElement); + locants[1] = m2.replaceAll(""); + changed = true; + } + if (changed) { + spiroLocant.addAttribute(new Attribute(TYPE_ATR, ADDEDHYDROGENLOCANT_TYPE_VAL)); + } + spiroLocant.setValue(StringTools.arrayToString(locants, ",")); + } + } + int groupCount = groups.size(); + if (groupCount < 2) { + throw new ComponentGenerationException("OPSIN Bug: Atleast two groups were expected in polycyclic spiro system"); + } + Element firstGroup = groups.get(0); List firstGroupEls = new ArrayList(); int indexOfOpenBracket = subOrRoot.indexOf(openBracket); - int indexOfFirstGroup = subOrRoot.indexOf(firstGroup); - for (int i =indexOfOpenBracket +1; i < indexOfFirstGroup; i++) { - firstGroupEls.add((Element) subOrRoot.getChild(i)); + Element firstSpiroLocant = OpsinTools.getNextSibling(firstGroup, SPIROLOCANT_EL); + if (firstSpiroLocant == null) { + throw new ComponentGenerationException("Unable to find spiroLocant for polycyclic spiro system"); + } + int indexOfFirstSpiroLocant = subOrRoot.indexOf(firstSpiroLocant); + for (int i = indexOfOpenBracket + 1; i < indexOfFirstSpiroLocant; i++) { + firstGroupEls.add(subOrRoot.getChild(i)); } - firstGroupEls.add(firstGroup); - firstGroupEls.addAll(XOMTools.getNextAdjacentSiblingsOfType(firstGroup, UNSATURATOR_EL)); resolveFeaturesOntoGroup(firstGroupEls); Set spiroAtoms = new HashSet(); - for (int i = 1; i < groups.size(); i++) { - Element nextGroup =groups.get(i); - Element locant = (Element) XOMTools.getNextSibling(groups.get(i-1), SPIROLOCANT_EL); - if (locant ==null){ - throw new ComponentGenerationException("Unable to find locantEl for polycyclic spiro system"); + for (int i = 1; i < groupCount; i++) { + Element nextGroup = groups.get(i); + Element spiroLocant = OpsinTools.getNextSibling(groups.get(i - 1), SPIROLOCANT_EL); + if (spiroLocant == null) { + throw new ComponentGenerationException("Unable to find spiroLocant for polycyclic spiro system"); } - + String[] locants = spiroLocant.getValue().split(","); + List nextGroupEls = new ArrayList(); - int indexOfLocant = subOrRoot.indexOf(locant); - int indexOfNextGroup = subOrRoot.indexOf(nextGroup); - for (int j =indexOfLocant +1; j < indexOfNextGroup; j++) { - nextGroupEls.add((Element) subOrRoot.getChild(j)); + int indexOfLocant = subOrRoot.indexOf(spiroLocant); + int indexOfNextSpiroLocantOrEndOfSpiro = subOrRoot.indexOf(i + 1 < groupCount ? OpsinTools.getNextSibling(nextGroup, SPIROLOCANT_EL) : OpsinTools.getNextSibling(nextGroup, STRUCTURALCLOSEBRACKET_EL)); + for (int j = indexOfLocant + 1; j < indexOfNextSpiroLocantOrEndOfSpiro; j++) { + nextGroupEls.add(subOrRoot.getChild(j)); } - nextGroupEls.add(nextGroup); - nextGroupEls.addAll(XOMTools.getNextAdjacentSiblingsOfType(nextGroup, UNSATURATOR_EL)); resolveFeaturesOntoGroup(nextGroupEls); - - String[] locants = MATCH_COMMA.split(StringTools.removeDashIfPresent(locant.getValue())); - if (locants.length!=2){ - throw new ComponentGenerationException("Incorrect number of locants found before component of polycyclic spiro system"); - } - for (int j = 0; j < locants.length; j++) { - String locantText= locants[j]; - Matcher m = matchAddedHydrogenBracket.matcher(locantText); - if (m.find()){ - Element addedHydrogenElement=new Element(ADDEDHYDROGEN_EL); - addedHydrogenElement.addAttribute(new Attribute(LOCANT_ATR, m.group(1))); - XOMTools.insertBefore(locant, addedHydrogenElement); - locant.addAttribute(new Attribute(TYPE_ATR, ADDEDHYDROGENLOCANT_TYPE_VAL)); - locants[j] = m.replaceAll(""); - } - } - locant.detach(); - Fragment nextFragment = state.xmlFragmentMap.get(nextGroup); + + spiroLocant.detach(); + Fragment nextFragment = nextGroup.getFrag(); FragmentTools.relabelNumericLocants(nextFragment.getAtomList(), StringTools.multiplyString("'", i)); - Atom atomToBeReplaced = nextFragment.getAtomByLocantOrThrow(locants[1]); - Atom atomOnParentFrag = null; + String secondLocant = locants[1]; + Atom atomOnNextFragment; + if (secondLocant.endsWith("'")){ + atomOnNextFragment = nextFragment.getAtomByLocantOrThrow(locants[1]); + } + else{ + //for simple spiro fusions the prime is often forgotten + atomOnNextFragment = nextFragment.getAtomByLocantOrThrow(locants[1] + "'"); + } + Atom atomToBeReplaced = null; for (int j = 0; j < i; j++) { - atomOnParentFrag = state.xmlFragmentMap.get(groups.get(j)).getAtomByLocant(locants[0]); - if (atomOnParentFrag!=null){ + atomToBeReplaced = groups.get(j).getFrag().getAtomByLocant(locants[0]); + if (atomToBeReplaced != null){ break; } } - if (atomOnParentFrag==null){ + if (atomToBeReplaced == null){ throw new ComponentGenerationException("Could not find the atom with locant " + locants[0] +" for use in polycyclic spiro system"); } - spiroAtoms.add(atomOnParentFrag); - state.fragManager.replaceAtomWithAnotherAtomPreservingConnectivity(atomToBeReplaced, atomOnParentFrag); + spiroAtoms.add(atomToBeReplaced); + if (atomToBeReplaced.getElement() != atomOnNextFragment.getElement()){ + //In well formed names these should be identical but by special case pick the heteroatom if the other is carbon + if (atomToBeReplaced.getElement() != ChemEl.C && atomOnNextFragment.getElement() == ChemEl.C) { + atomOnNextFragment.setElement(atomToBeReplaced.getElement()); + } + else if (atomToBeReplaced.getElement() != ChemEl.C && atomOnNextFragment.getElement() != ChemEl.C) { + throw new ComponentGenerationException("Disagreement between which element the spiro atom should be: " + atomToBeReplaced.getElement() +" and " + atomOnNextFragment.getElement() ); + } + } if (atomToBeReplaced.hasSpareValency()){ - atomOnParentFrag.setSpareValency(true); + atomOnNextFragment.setSpareValency(true); } + state.fragManager.replaceAtomWithAnotherAtomPreservingConnectivity(atomToBeReplaced, atomOnNextFragment); } - if (spiroAtoms.size()>1){ - Element expectedMultiplier = (Element) XOMTools.getPreviousSibling(polyCyclicSpiroDescriptor); - if (expectedMultiplier!=null && expectedMultiplier.getLocalName().equals(MULTIPLIER_EL) && Integer.parseInt(expectedMultiplier.getAttributeValue(VALUE_ATR))==spiroAtoms.size()){ + if (spiroAtoms.size() > 1) { + Element expectedMultiplier = OpsinTools.getPreviousSibling(polyCyclicSpiroDescriptor); + if (expectedMultiplier != null && expectedMultiplier.getName().equals(MULTIPLIER_EL) && Integer.parseInt(expectedMultiplier.getAttributeValue(VALUE_ATR)) == spiroAtoms.size()) { expectedMultiplier.detach(); } } - Element rootGroup = groups.get(groups.size()-1); - Fragment rootFrag = state.xmlFragmentMap.get(rootGroup); + Element rootGroup = groups.get(groupCount - 1); + Fragment rootFrag = rootGroup.getFrag(); String name = rootGroup.getValue(); - for (int i = 0; i < groups.size() -1; i++) { - Element group =groups.get(i); - state.fragManager.incorporateFragment(state.xmlFragmentMap.get(group), rootFrag); + for (int i = 0; i < groupCount - 1; i++) { + Element group = groups.get(i); + state.fragManager.incorporateFragment(group.getFrag(), rootFrag); name = group.getValue() + name; group.detach(); } - XOMTools.setTextChild(rootGroup, polyCyclicSpiroDescriptor.getValue() + name); + rootGroup.setValue(polyCyclicSpiroDescriptor.getValue() + name); openBracket.detach(); closeBracket.detach(); } @@ -3066,32 +3398,32 @@ */ private void processOldMethodPolyCyclicSpiro(List spiroElements) throws ComponentGenerationException, StructureBuildingException { Element firstSpiro =spiroElements.get(0); - Element subOrRoot = (Element) firstSpiro.getParent(); - Element firstEl = (Element) subOrRoot.getChild(0); - List elementsToResolve = XOMTools.getSiblingsUpToElementWithTagName(firstEl, POLYCYCLICSPIRO_EL); + Element subOrRoot = firstSpiro.getParent(); + Element firstEl = subOrRoot.getChild(0); + List elementsToResolve = OpsinTools.getSiblingsUpToElementWithTagName(firstEl, POLYCYCLICSPIRO_EL); elementsToResolve.add(0, firstEl); resolveFeaturesOntoGroup(elementsToResolve); for (int i = 0; i < spiroElements.size(); i++) { Element currentSpiro = spiroElements.get(i); - Element previousGroup = (Element) XOMTools.getPreviousSibling(currentSpiro, GROUP_EL); + Element previousGroup = OpsinTools.getPreviousSibling(currentSpiro, GROUP_EL); if (previousGroup==null){ throw new ComponentGenerationException("OPSIN bug: unable to locate group before polycylic spiro descriptor"); } - Element nextGroup = (Element) XOMTools.getNextSibling(currentSpiro, GROUP_EL); + Element nextGroup = OpsinTools.getNextSibling(currentSpiro, GROUP_EL); if (nextGroup==null){ throw new ComponentGenerationException("OPSIN bug: unable to locate group after polycylic spiro descriptor"); } - Fragment parentFrag = state.xmlFragmentMap.get(nextGroup); - Fragment previousFrag = state.xmlFragmentMap.get(previousGroup); + Fragment previousFrag = previousGroup.getFrag(); + Fragment parentFrag = nextGroup.getFrag(); FragmentTools.relabelNumericLocants(parentFrag.getAtomList(), StringTools.multiplyString("'",i+1)); - elementsToResolve = XOMTools.getSiblingsUpToElementWithTagName(currentSpiro, POLYCYCLICSPIRO_EL); + elementsToResolve = OpsinTools.getSiblingsUpToElementWithTagName(currentSpiro, POLYCYCLICSPIRO_EL); resolveFeaturesOntoGroup(elementsToResolve); String locant1 =null; - Element possibleFirstLocant = (Element) XOMTools.getPreviousSibling(currentSpiro); - if (possibleFirstLocant !=null && possibleFirstLocant.getLocalName().equals(LOCANT_EL)){ - if (MATCH_COMMA.split(possibleFirstLocant.getValue()).length==1){ + Element possibleFirstLocant = OpsinTools.getPreviousSibling(currentSpiro); + if (possibleFirstLocant !=null && possibleFirstLocant.getName().equals(LOCANT_EL)){ + if (possibleFirstLocant.getValue().split(",").length==1){ locant1 = possibleFirstLocant.getValue(); possibleFirstLocant.detach(); } @@ -3104,13 +3436,20 @@ atomToBeReplaced = previousFrag.getAtomByLocantOrThrow(locant1); } else{ - atomToBeReplaced = previousFrag.getAtomOrNextSuitableAtomOrThrow(previousFrag.getFirstAtom(), 2, true); + List potentialAtoms = FragmentTools.findSubstituableAtoms(previousFrag, 2); + if (potentialAtoms.isEmpty()) { + throw new StructureBuildingException("No suitable atom found for spiro fusion"); + } + if (AmbiguityChecker.isSubstitutionAmbiguous(potentialAtoms, 1)) { + state.addIsAmbiguous("Choice of atom for spiro fusion on: " + previousGroup.getValue()); + } + atomToBeReplaced = potentialAtoms.get(0); } Atom atomOnParentFrag; String locant2 =null; - Element possibleSecondLocant = (Element) XOMTools.getNextSibling(currentSpiro); - if (possibleSecondLocant !=null && possibleSecondLocant.getLocalName().equals(LOCANT_EL)){ - if (MATCH_COMMA.split(possibleSecondLocant.getValue()).length==1){ + Element possibleSecondLocant = OpsinTools.getNextSibling(currentSpiro); + if (possibleSecondLocant !=null && possibleSecondLocant.getName().equals(LOCANT_EL)){ + if (possibleSecondLocant.getValue().split(",").length==1){ locant2 = possibleSecondLocant.getValue(); possibleSecondLocant.detach(); } @@ -3118,11 +3457,18 @@ throw new ComponentGenerationException("Malformed locant after polycyclic spiro descriptor"); } } - if (locant2!=null){ + if (locant2 != null){ atomOnParentFrag = parentFrag.getAtomByLocantOrThrow(locant2); } else{ - atomOnParentFrag = parentFrag.getAtomOrNextSuitableAtomOrThrow(parentFrag.getFirstAtom(), 2, true); + List potentialAtoms = FragmentTools.findSubstituableAtoms(parentFrag, 2); + if (potentialAtoms.isEmpty()) { + throw new StructureBuildingException("No suitable atom found for spiro fusion"); + } + if (AmbiguityChecker.isSubstitutionAmbiguous(potentialAtoms, 1)) { + state.addIsAmbiguous("Choice of atom for spiro fusion on: " + nextGroup.getValue()); + }; + atomOnParentFrag = potentialAtoms.get(0); } state.fragManager.replaceAtomWithAnotherAtomPreservingConnectivity(atomToBeReplaced, atomOnParentFrag); if (atomToBeReplaced.hasSpareValency()){ @@ -3133,7 +3479,7 @@ atomOnParentFrag.setProtonsExplicitlyAddedOrRemoved(atomToBeReplaced.getProtonsExplicitlyAddedOrRemoved()); } state.fragManager.incorporateFragment(previousFrag, parentFrag); - XOMTools.setTextChild(nextGroup, previousGroup.getValue() + currentSpiro.getValue() + nextGroup.getValue()); + nextGroup.setValue(previousGroup.getValue() + currentSpiro.getValue() + nextGroup.getValue()); previousGroup.detach(); } } @@ -3147,9 +3493,9 @@ * @throws StructureBuildingException */ private void processSpiroBiOrTer(Element polyCyclicSpiroDescriptor, int components) throws ComponentGenerationException, StructureBuildingException { - Element locant = (Element) XOMTools.getPreviousSibling(polyCyclicSpiroDescriptor); + Element locant = OpsinTools.getPreviousSibling(polyCyclicSpiroDescriptor); String locantText; - if (locant ==null || !locant.getLocalName().equals(LOCANT_EL)){ + if (locant ==null || !locant.getName().equals(LOCANT_EL)){ if (components==2){ locantText ="1,1'"; } @@ -3161,34 +3507,52 @@ locantText = locant.getValue(); locant.detach(); } - String[] locants = MATCH_COMMA.split(locantText); + String[] locants = locantText.split(","); if (locants.length!=components){ throw new ComponentGenerationException("Mismatch between spiro descriptor and number of locants provided"); } - Element group = (Element) XOMTools.getNextSibling(polyCyclicSpiroDescriptor, GROUP_EL); + Element group = OpsinTools.getNextSibling(polyCyclicSpiroDescriptor, GROUP_EL); if (group==null){ throw new ComponentGenerationException("Cannot find group to which spirobi/ter descriptor applies"); } determineFeaturesToResolveInSingleComponentSpiro(polyCyclicSpiroDescriptor); - Fragment fragment = state.xmlFragmentMap.get(group); + Fragment fragment = group.getFrag(); List clones = new ArrayList(); for (int i = 1; i < components ; i++) { clones.add(state.fragManager.copyAndRelabelFragment(fragment, i)); } - for (Fragment clone : clones) { - state.fragManager.incorporateFragment(clone, fragment); - } Atom atomOnOriginalFragment = fragment.getAtomByLocantOrThrow(locants[0]); for (int i = 1; i < components ; i++) { - Atom atomToBeReplaced = fragment.getAtomByLocantOrThrow(locants[i]); + Fragment clone = clones.get(i - 1); + Atom atomToBeReplaced; + if (components ==2 && !locants[i].endsWith("'")){ + //Allow prime to be (incorrectly) omitted on second locant in spirobi + try { + atomToBeReplaced = clone.getAtomByLocantOrThrow(locants[i]); + } + catch (StructureBuildingException e){ + atomToBeReplaced = clone.getAtomByLocant(locants[i] + "'"); + if (atomToBeReplaced == null){ + throw e; + } + } + } + else{ + atomToBeReplaced = clone.getAtomByLocantOrThrow(locants[i]); + } + state.fragManager.replaceAtomWithAnotherAtomPreservingConnectivity(atomToBeReplaced, atomOnOriginalFragment); if (atomToBeReplaced.hasSpareValency()){ atomOnOriginalFragment.setSpareValency(true); } } - XOMTools.setTextChild(group, polyCyclicSpiroDescriptor.getValue() + group.getValue()); + for (Fragment clone : clones) { + state.fragManager.incorporateFragment(clone, fragment); + } + + group.setValue(polyCyclicSpiroDescriptor.getValue() + group.getValue()); } /** @@ -3201,13 +3565,13 @@ String value = polyCyclicSpiroDescriptor.getValue(); value = value.substring(0, value.length()-10);//remove dispiroter value = StringTools.removeDashIfPresent(value); - String[] locants = MATCH_COLON.split(value); - Element group = (Element) XOMTools.getNextSibling(polyCyclicSpiroDescriptor, GROUP_EL); + String[] locants = value.split(":"); + Element group = OpsinTools.getNextSibling(polyCyclicSpiroDescriptor, GROUP_EL); if (group==null){ throw new ComponentGenerationException("Cannot find group to which dispiroter descriptor applies"); } determineFeaturesToResolveInSingleComponentSpiro(polyCyclicSpiroDescriptor); - Fragment fragment = state.xmlFragmentMap.get(group); + Fragment fragment = group.getFrag(); List clones = new ArrayList(); for (int i = 1; i < 3 ; i++) { clones.add(state.fragManager.copyAndRelabelFragment(fragment, i)); @@ -3216,21 +3580,21 @@ state.fragManager.incorporateFragment(clone, fragment); } - Atom atomOnLessPrimedFragment = fragment.getAtomByLocantOrThrow(MATCH_COMMA.split(locants[0])[0]); - Atom atomToBeReplaced = fragment.getAtomByLocantOrThrow(MATCH_COMMA.split(locants[0])[1]); + Atom atomOnLessPrimedFragment = fragment.getAtomByLocantOrThrow(locants[0].split(",")[0]); + Atom atomToBeReplaced = fragment.getAtomByLocantOrThrow(locants[0].split(",")[1]); state.fragManager.replaceAtomWithAnotherAtomPreservingConnectivity(atomToBeReplaced, atomOnLessPrimedFragment); if (atomToBeReplaced.hasSpareValency()){ atomOnLessPrimedFragment.setSpareValency(true); } - atomOnLessPrimedFragment = fragment.getAtomByLocantOrThrow(MATCH_COMMA.split(locants[1])[0]); - atomToBeReplaced = fragment.getAtomByLocantOrThrow(MATCH_COMMA.split(locants[1])[1]); + atomOnLessPrimedFragment = fragment.getAtomByLocantOrThrow(locants[1].split(",")[0]); + atomToBeReplaced = fragment.getAtomByLocantOrThrow(locants[1].split(",")[1]); state.fragManager.replaceAtomWithAnotherAtomPreservingConnectivity(atomToBeReplaced, atomOnLessPrimedFragment); if (atomToBeReplaced.hasSpareValency()){ atomOnLessPrimedFragment.setSpareValency(true); } - XOMTools.setTextChild(group, "dispiroter" + group.getValue()); + group.setValue("dispiroter" + group.getValue()); } /** @@ -3241,15 +3605,15 @@ * @throws ComponentGenerationException */ private void determineFeaturesToResolveInSingleComponentSpiro(Element polyCyclicSpiroDescriptor) throws StructureBuildingException, ComponentGenerationException { - Element possibleOpenBracket = (Element) XOMTools.getNextSibling(polyCyclicSpiroDescriptor); + Element possibleOpenBracket = OpsinTools.getNextSibling(polyCyclicSpiroDescriptor); List elementsToResolve; - if (possibleOpenBracket.getLocalName().equals(STRUCTURALOPENBRACKET_EL)){ + if (possibleOpenBracket.getName().equals(STRUCTURALOPENBRACKET_EL)){ possibleOpenBracket.detach(); - elementsToResolve = XOMTools.getSiblingsUpToElementWithTagName(polyCyclicSpiroDescriptor, STRUCTURALCLOSEBRACKET_EL); - XOMTools.getNextSibling(elementsToResolve.get(elementsToResolve.size()-1)).detach();//detach close bracket + elementsToResolve = OpsinTools.getSiblingsUpToElementWithTagName(polyCyclicSpiroDescriptor, STRUCTURALCLOSEBRACKET_EL); + OpsinTools.getNextSibling(elementsToResolve.get(elementsToResolve.size()-1)).detach();//detach close bracket } else{ - elementsToResolve = XOMTools.getSiblingsUpToElementWithTagName(polyCyclicSpiroDescriptor, GROUP_EL); + elementsToResolve = OpsinTools.getSiblingsUpToElementWithTagName(polyCyclicSpiroDescriptor, GROUP_EL); } resolveFeaturesOntoGroup(elementsToResolve); } @@ -3265,14 +3629,14 @@ if (elementsToResolve.size()==0){ return; } - Element substituentToResolve = new Element(SUBSTITUENT_EL);//temporary element containing elements that should be resolved before the ring is cloned - Element parent = (Element) elementsToResolve.get(0).getParent(); + Element substituentToResolve = new GroupingEl(SUBSTITUENT_EL);//temporary element containing elements that should be resolved before the ring is cloned + Element parent = elementsToResolve.get(0).getParent(); int index = parent.indexOf(elementsToResolve.get(0)); Element group =null; List suffixes = new ArrayList(); Element locant =null; for (Element element : elementsToResolve) { - String elName =element.getLocalName(); + String elName =element.getName(); if (elName.equals(GROUP_EL)){ group = element; } @@ -3283,14 +3647,14 @@ locant = element; } element.detach(); - substituentToResolve.appendChild(element); + substituentToResolve.addChild(element); } if (group ==null){ throw new ComponentGenerationException("OPSIN bug: group element should of been given to method"); } if (locant !=null){//locant is probably an indirect locant, try and assign it List locantAble = findElementsMissingIndirectLocants(substituentToResolve, locant); - String[] locantValues = MATCH_COMMA.split(locant.getValue()); + String[] locantValues = locant.getValue().split(","); if (locantAble.size() >= locantValues.length){ for (int i = 0; i < locantValues.length; i++) { String locantValue = locantValues[i]; @@ -3300,15 +3664,15 @@ } } if (!suffixes.isEmpty()){ - resolveSuffixes(group, suffixes); + suffixApplier.resolveSuffixes(group, suffixes); for (Element suffix : suffixes) { suffix.detach(); } } - if (substituentToResolve.getChildElements().size()!=0){ + if (substituentToResolve.getChildCount() != 0){ StructureBuildingMethods.resolveLocantedFeatures(state, substituentToResolve); StructureBuildingMethods.resolveUnLocantedFeatures(state, substituentToResolve); - Elements children = substituentToResolve.getChildElements(); + List children = substituentToResolve.getChildElements(); for (int i = children.size() -1; i>=0; i--) { Element child = children.get(i); child.detach(); @@ -3316,41 +3680,152 @@ } } } + + private static class SortBridgesByHighestLocantedBridgehead implements Comparator{ + private final Map bridgeToRingAtoms; + SortBridgesByHighestLocantedBridgehead(Map bridgeToRingAtoms) { + this.bridgeToRingAtoms = bridgeToRingAtoms; + } + + public int compare(Fragment bridge1, Fragment bridge2) { + Atom[] ringAtoms1 = bridgeToRingAtoms.get(bridge1); + int bridge1HighestRingLocant = Math.max(getLocantNumber(ringAtoms1[0]),getLocantNumber(ringAtoms1[1])); + + Atom[] ringAtoms2 = bridgeToRingAtoms.get(bridge2); + int bridge2HighestRingLocant = Math.max(getLocantNumber(ringAtoms2[0]),getLocantNumber(ringAtoms2[1])); + + if (bridge1HighestRingLocant > bridge2HighestRingLocant){ + return -1; + } + if (bridge1HighestRingLocant < bridge2HighestRingLocant){ + return 1; + } + return 0; + } + + } /** * Processes bridges e.g. 4,7-methanoindene * Resolves and attaches said bridges to the adjacent ring fragment + * Numbers the bridges in accordance with FR-8.6/FR-8.7 * @param subOrRoot * @throws StructureBuildingException */ private void processFusedRingBridges(Element subOrRoot) throws StructureBuildingException { - List bridges = XOMTools.getChildElementsWithTagName(subOrRoot, FUSEDRINGBRIDGE_EL); + List bridges = subOrRoot.getChildElements(FUSEDRINGBRIDGE_EL); + int bridgeCount = bridges.size(); + if (bridgeCount == 0) { + return; + } + Element groupEl = OpsinTools.getNextSibling(bridges.get(bridgeCount - 1), GROUP_EL); + Fragment ringFrag = groupEl.getFrag(); + Map bridgeToRingAtoms = new LinkedHashMap(); for (Element bridge : bridges) { - Fragment ringFrag = state.xmlFragmentMap.get(XOMTools.getNextSibling(bridge, GROUP_EL)); - Fragment bridgeFrag =state.fragManager.buildSMILES(bridge.getAttributeValue(VALUE_ATR), ringFrag.getType(), ringFrag.getSubType(), NONE_LABELS_VAL);//TODO label bridges - - List bridgeAtomList =bridgeFrag.getAtomList(); - bridgeFrag.addOutAtom(bridgeAtomList.get(0), 1, true); - bridgeFrag.addOutAtom(bridgeAtomList.get(bridgeAtomList.size()-1), 1, true); - Element possibleLocant = (Element) XOMTools.getPreviousSibling(bridge); - if (possibleLocant !=null && possibleLocant.getLocalName().equals(LOCANT_EL)){ - String[] locantArray = MATCH_COMMA.split(possibleLocant.getValue()); - if (locantArray.length==2){ - bridgeFrag.getOutAtom(0).setLocant(locantArray[0]); - bridgeFrag.getOutAtom(1).setLocant(locantArray[1]); - possibleLocant.detach(); + Element possibleMultiplier = OpsinTools.getPreviousSibling(bridge); + List locants = null; + int multiplier = 1; + if (possibleMultiplier != null) { + Element possibleLocant; + if (possibleMultiplier.getName().equals(MULTIPLIER_EL)) { + multiplier = Integer.parseInt(possibleMultiplier.getAttributeValue(VALUE_ATR)); + possibleLocant = OpsinTools.getPreviousSibling(possibleMultiplier); + possibleMultiplier.detach(); + if (possibleLocant != null && possibleLocant.getName().equals(COLONORSEMICOLONDELIMITEDLOCANT_EL)) { + locants = new ArrayList(); + String[] locantsForEachMultiple = StringTools.removeDashIfPresent(possibleLocant.getValue()).split(":"); + if (locantsForEachMultiple.length != multiplier) { + throw new RuntimeException("Mismatch between locant and multiplier counts (" + locantsForEachMultiple.length + " and " + multiplier + "): " + possibleLocant.getValue()); + } + for (String locantsForInstance : locantsForEachMultiple) { + String[] locantArray = locantsForInstance.split(","); + if (locantArray.length != 2) { + throw new RuntimeException("Expected two locants per bridge, but was: " + possibleLocant.getValue()); + } + locants.add(locantArray); + } + possibleLocant.detach(); + } + } + else { + possibleLocant = possibleMultiplier; + if (possibleLocant != null && possibleLocant.getName().equals(LOCANT_EL)) { + String[] locantArray = possibleLocant.getValue().split(","); + if (locantArray.length == 2) { + locants = new ArrayList(); + locants.add(locantArray); + possibleLocant.detach(); + } + } } - StructureBuildingMethods.formEpoxide(state, bridgeFrag, ringFrag.getDefaultInAtom()); } - else{ - StructureBuildingMethods.formEpoxide(state, bridgeFrag, ringFrag.getAtomOrNextSuitableAtomOrThrow(ringFrag.getDefaultInAtom(), 1, true)); + for (int i = 0; i < multiplier; i++) { + Fragment bridgeFrag = state.fragManager.buildSMILES(bridge.getAttributeValue(VALUE_ATR), groupEl, NONE_LABELS_VAL); + List bridgeAtomList = bridgeFrag.getAtomList(); + bridgeFrag.addOutAtom(bridgeAtomList.get(0), 1, true); + bridgeFrag.addOutAtom(bridgeAtomList.get(bridgeAtomList.size() - 1), 1, true); + Atom[] ringAtoms; + if (locants != null) { + String[] locantArray = locants.get(i); + if (locantArray.length == 2) { + bridgeFrag.getOutAtom(0).setLocant(locantArray[0]); + bridgeFrag.getOutAtom(1).setLocant(locantArray[1]); + } + ringAtoms = StructureBuildingMethods.formEpoxide(state, bridgeFrag, ringFrag.getDefaultInAtomOrFirstAtom()); + } + else{ + List possibleAtoms = FragmentTools.findSubstituableAtoms(ringFrag, 1); + if (possibleAtoms.isEmpty()) { + throw new StructureBuildingException("Unable to find suitable atom to form bridge"); + } + if (AmbiguityChecker.isSubstitutionAmbiguous(possibleAtoms, 1)) { + state.addIsAmbiguous("Addition of bridge to: " + groupEl.getValue()); + } + ringAtoms = StructureBuildingMethods.formEpoxide(state, bridgeFrag, possibleAtoms.get(0)); + } + bridgeToRingAtoms.put(bridgeFrag, ringAtoms); + state.fragManager.incorporateFragment(bridgeFrag, ringFrag); } - state.fragManager.incorporateFragment(bridgeFrag, ringFrag); bridge.detach(); } + int highestLocant = getHighestNumericLocant(ringFrag); + List bridgeFragments = new ArrayList(bridgeToRingAtoms.keySet()); + Collections.sort(bridgeFragments, new SortBridgesByHighestLocantedBridgehead(bridgeToRingAtoms)); + for (Fragment bridgeFragment: bridgeFragments) { + List bridgeFragmentAtoms = bridgeFragment.getAtomList(); + Atom[] ringAtoms = bridgeToRingAtoms.get(bridgeFragment); + if (getLocantNumber(ringAtoms[0]) <= getLocantNumber(ringAtoms[1])){ + for (int i = bridgeFragmentAtoms.size() - 1; i >=0; i--) { + bridgeFragmentAtoms.get(i).addLocant(String.valueOf(++highestLocant)); + } + } + else{ + for (Atom atom : bridgeFragmentAtoms) { + atom.addLocant(String.valueOf(++highestLocant)); + } + } + } } + private static int getLocantNumber(Atom atom) { + String locant = atom.getFirstLocant(); + if (locant != null) { + Matcher m = MATCH_NUMERIC_LOCANT.matcher(locant); + if (m.matches()){ + return Integer.parseInt(m.group(1)); + } + } + return 0; + } + + private int getHighestNumericLocant(Fragment ringFrag) { + for (int i = 1; ; i++) { + if (ringFrag.getAtomByLocant(String.valueOf(i)) == null){ + return i - 1; + } + } + } /** * Searches for lambdaConvention elements and applies the valency they specify to the atom @@ -3359,14 +3834,14 @@ * @throws StructureBuildingException */ private void applyLambdaConvention(Element subOrRoot) throws StructureBuildingException { - List lambdaConventionEls = XOMTools.getChildElementsWithTagName(subOrRoot, LAMBDACONVENTION_EL); + List lambdaConventionEls = subOrRoot.getChildElements(LAMBDACONVENTION_EL); for (Element lambdaConventionEl : lambdaConventionEls) { - Fragment frag = state.xmlFragmentMap.get(subOrRoot.getFirstChildElement(GROUP_EL)); + Fragment frag = subOrRoot.getFirstChildElement(GROUP_EL).getFrag(); if (lambdaConventionEl.getAttribute(LOCANT_ATR)!=null){ frag.getAtomByLocantOrThrow(lambdaConventionEl.getAttributeValue(LOCANT_ATR)).setLambdaConventionValency(Integer.parseInt(lambdaConventionEl.getAttributeValue(LAMBDA_ATR))); } else{ - if (frag.getAtomList().size()!=1){ + if (frag.getAtomCount()!=1){ throw new StructureBuildingException("Ambiguous use of lambda convention. Fragment has more than 1 atom but no locant was specified for the lambda"); } frag.getFirstAtom().setLambdaConventionValency(Integer.parseInt(lambdaConventionEl.getAttributeValue(LAMBDA_ATR))); @@ -3388,13 +3863,13 @@ private void handleMultiRadicals(Element subOrRoot) throws ComponentGenerationException, StructureBuildingException{ Element group =subOrRoot.getFirstChildElement(GROUP_EL); String groupValue =group.getValue(); - Fragment thisFrag = state.xmlFragmentMap.get(group); - if (groupValue.equals("methylene") || groupValue.equals("oxy")|| matchChalcogenReplacement.matcher(groupValue).matches()){//resolves for example trimethylene to propan-1,3-diyl or dithio to disulfan-1,2-diyl. Locants may not be specified before the multiplier - Element beforeGroup =(Element) XOMTools.getPreviousSibling(group); - if (beforeGroup!=null && beforeGroup.getLocalName().equals(MULTIPLIER_ATR) && beforeGroup.getAttributeValue(TYPE_ATR).equals(BASIC_TYPE_VAL) && XOMTools.getPreviousSibling(beforeGroup)==null){ + Fragment thisFrag = group.getFrag(); + if (groupValue.equals("methylene") || groupValue.equals("methylen") || groupValue.equals("oxy")|| matchChalcogenReplacement.matcher(groupValue).matches()){//resolves for example trimethylene to propan-1,3-diyl or dithio to disulfan-1,2-diyl. Locants may not be specified before the multiplier + Element beforeGroup = OpsinTools.getPreviousSibling(group); + if (beforeGroup!=null && beforeGroup.getName().equals(MULTIPLIER_ATR) && beforeGroup.getAttributeValue(TYPE_ATR).equals(BASIC_TYPE_VAL) && OpsinTools.getPreviousSibling(beforeGroup)==null){ int multiplierVal = Integer.parseInt(beforeGroup.getAttributeValue(VALUE_ATR)); if (!unsuitableForFormingChainMultiradical(group, beforeGroup)){ - if (groupValue.equals("methylene")){ + if (groupValue.equals("methylene") || groupValue.equals("methylen")){ group.getAttribute(VALUE_ATR).setValue(StringTools.multiplyString("C", multiplierVal)); } else if (groupValue.equals("oxy")){ @@ -3413,7 +3888,7 @@ throw new ComponentGenerationException("unexpected group value"); } group.getAttribute(OUTIDS_ATR).setValue("1,"+Integer.parseInt(beforeGroup.getAttributeValue(VALUE_ATR))); - XOMTools.setTextChild(group, beforeGroup.getValue() + groupValue); + group.setValue(beforeGroup.getValue() + groupValue); beforeGroup.detach(); if (group.getAttribute(LABELS_ATR)!=null){//use numeric numbering group.getAttribute(LABELS_ATR).setValue(NUMERIC_LABELS_VAL); @@ -3422,26 +3897,25 @@ group.addAttribute(new Attribute(LABELS_ATR, NUMERIC_LABELS_VAL)); } state.fragManager.removeFragment(thisFrag); - thisFrag =resolveGroup(state, group); - state.xmlFragmentMap.put(group, thisFrag); + thisFrag = resolveGroup(state, group); group.removeAttribute(group.getAttribute(USABLEASJOINER_ATR)); } } } if (group.getAttribute(OUTIDS_ATR)!=null){//adds outIDs at the specified atoms - String[] radicalPositions = MATCH_COMMA.split(group.getAttributeValue(OUTIDS_ATR)); + String[] radicalPositions = group.getAttributeValue(OUTIDS_ATR).split(","); int firstIdInFrag =thisFrag.getIdOfFirstAtom(); - for (String radicalID : radicalPositions) { - thisFrag.addOutAtom(firstIdInFrag + Integer.parseInt(radicalID) - 1, 1, true); - } + for (String radicalID : radicalPositions) { + thisFrag.addOutAtom(firstIdInFrag + Integer.parseInt(radicalID) - 1, 1, true); + } } int outAtomCount = thisFrag.getOutAtomCount(); if (outAtomCount >=2){ - if (groupValue.equals("amine")){//amine is a special case as it shouldn't technically be allowed but is allowed due to it's common usage in EDTA - Element previousGroup =(Element) OpsinTools.getPreviousGroup(group); - Element nextGroup =(Element) OpsinTools.getNextGroup(group); - if (previousGroup==null || state.xmlFragmentMap.get(previousGroup).getOutAtomCount() < 2 || nextGroup==null){//must be preceded by a multi radical + if (groupValue.equals("amine") || groupValue.equals("amin")) {//amine is a special case as it shouldn't technically be allowed but is allowed due to it's common usage in EDTA + Element previousGroup = OpsinTools.getPreviousGroup(group); + Element nextGroup = OpsinTools.getNextGroup(group); + if (previousGroup==null || previousGroup.getFrag().getOutAtomCount() < 2 || nextGroup==null){//must be preceded by a multi radical throw new ComponentGenerationException("Invalid use of amine as a substituent!"); } } @@ -3459,9 +3933,9 @@ } if (outAtomCount ==2 && EPOXYLIKE_SUBTYPE_VAL.equals(group.getAttributeValue(SUBTYPE_ATR))){ - Element possibleLocant =(Element) XOMTools.getPreviousSibling(group); + Element possibleLocant = OpsinTools.getPreviousSibling(group); if (possibleLocant !=null){ - String[] locantValues = MATCH_COMMA.split(possibleLocant.getValue()); + String[] locantValues = possibleLocant.getValue().split(","); if (locantValues.length==2){ thisFrag.getOutAtom(0).setLocant(locantValues[0]); thisFrag.getOutAtom(1).setLocant(locantValues[1]); @@ -3485,14 +3959,14 @@ * @return */ private boolean unsuitableForFormingChainMultiradical(Element group, Element multiplierBeforeGroup) { - Element previousGroup = (Element) OpsinTools.getPreviousGroup(group); + Element previousGroup = OpsinTools.getPreviousGroup(group); if (previousGroup!=null){ if (previousGroup.getAttribute(ISAMULTIRADICAL_ATR)!=null){ - if (previousGroup.getAttributeValue(ACCEPTSADDITIVEBONDS_ATR)!=null && XOMTools.getPreviousSibling(previousGroup.getParent())!=null){ + if (previousGroup.getAttributeValue(ACCEPTSADDITIVEBONDS_ATR)!=null && OpsinTools.getPreviousSibling(previousGroup.getParent())!=null){ return false; } //the initial multiplier is proceded by another multiplier e.g. bis(dithio) - if (((Element)XOMTools.getPrevious(multiplierBeforeGroup)).getLocalName().equals(MULTIPLIER_EL)){ + if (OpsinTools.getPrevious(multiplierBeforeGroup).getName().equals(MULTIPLIER_EL)){ return false; } if (previousGroup.getAttributeValue(ISAMULTIRADICAL_ATR).equals(multiplierBeforeGroup.getAttributeValue(VALUE_ATR))){ @@ -3502,15 +3976,15 @@ return false; } } - else if (XOMTools.getPreviousSibling(previousGroup, MULTIPLIER_EL)==null){ + else if (OpsinTools.getPreviousSibling(previousGroup, MULTIPLIER_EL)==null){ //This is a 99% solution to the detection of cases such as ethylidenedioxy == ethan-1,1-diyldioxy - Fragment previousGroupFrag =state.xmlFragmentMap.get(previousGroup); + Fragment previousGroupFrag = previousGroup.getFrag(); int outAtomValency =0; if (previousGroupFrag.getOutAtomCount()==1){ outAtomValency = previousGroupFrag.getOutAtom(0).getValency(); } else{ - Element suffix = (Element) XOMTools.getNextSibling(previousGroup, SUFFIX_EL); + Element suffix = OpsinTools.getNextSibling(previousGroup, SUFFIX_EL); if (suffix!=null && suffix.getAttributeValue(VALUE_ATR).equals("ylidene")){ outAtomValency =2; } @@ -3534,13 +4008,13 @@ * @return numberOfOutAtoms that will be added by resolveSuffixes * @throws ComponentGenerationException */ - private int calculateOutAtomsToBeAddedFromInlineSuffixes(Element group, Elements suffixes) throws ComponentGenerationException { + private int calculateOutAtomsToBeAddedFromInlineSuffixes(Element group, List suffixes) throws ComponentGenerationException { int outAtomsThatWillBeAdded = 0; - Fragment frag = state.xmlFragmentMap.get(group); + Fragment frag = group.getFrag(); String groupType = frag.getType(); String subgroupType = frag.getSubType(); String suffixTypeToUse =null; - if (suffixRules.isGroupTypeWithSpecificSuffixRules(groupType)){ + if (suffixApplier.isGroupTypeWithSpecificSuffixRules(groupType)){ suffixTypeToUse =groupType; } else{ @@ -3552,15 +4026,12 @@ for (Fragment suffix : suffixList) { outAtomsThatWillBeAdded += suffix.getOutAtomCount(); } - for(int i=0;i suffixRules = suffixApplier.getSuffixRuleTags(suffixTypeToUse, suffixValue, subgroupType); + for (SuffixRule suffixRule : suffixRules) { + if(suffixRule.getType() == SuffixRuleType.setOutAtom) { + outAtomsThatWillBeAdded += 1; } } } @@ -3578,35 +4049,35 @@ for (int i = groups.size() -1; i >=0; i--) { Element group = groups.get(i); if (group.getAttributeValue(TYPE_ATR).equals(AMINOACID_TYPE_VAL) && OpsinTools.getNextGroup(group)!=null){ - Element possibleLocant = (Element) XOMTools.getPreviousSiblingIgnoringCertainElements(group, new String[]{MULTIPLIER_EL}); - if (possibleLocant != null && possibleLocant.getLocalName().equals(LOCANT_EL)){ + Element possibleLocant = OpsinTools.getPreviousSiblingIgnoringCertainElements(group, new String[]{MULTIPLIER_EL}); + if (possibleLocant != null && possibleLocant.getName().equals(LOCANT_EL)){ continue; } - Element subOrRoot = (Element) group.getParent(); + Element subOrRoot = group.getParent(); //now find the brackets/substituents before this element - Element previous = (Element) XOMTools.getPreviousSibling(subOrRoot); + Element previous = OpsinTools.getPreviousSibling(subOrRoot); List previousElements = new ArrayList(); while( previous !=null){ - if (!previous.getLocalName().equals(SUBSTITUENT_EL) && !previous.getLocalName().equals(BRACKET_EL)){ + if (!previous.getName().equals(SUBSTITUENT_EL) && !previous.getName().equals(BRACKET_EL)){ break; } previousElements.add(previous); - previous = (Element) XOMTools.getPreviousSibling(previous); + previous = OpsinTools.getPreviousSibling(previous); } if (previousElements.size()>0){//an implicit bracket is needed Collections.reverse(previousElements); - Element bracket = new Element(BRACKET_EL); + Element bracket = new GroupingEl(BRACKET_EL); bracket.addAttribute(new Attribute(TYPE_ATR, IMPLICIT_TYPE_VAL)); - Element parent = (Element) subOrRoot.getParent(); + Element parent = subOrRoot.getParent(); int indexToInsertAt = parent.indexOf(previousElements.get(0)); for (Element element : previousElements) { element.detach(); - bracket.appendChild(element); + bracket.addChild(element); } subOrRoot.detach(); - bracket.appendChild(subOrRoot); + bracket.addChild(subOrRoot); parent.insertChild(bracket, indexToInsertAt); brackets.add(bracket); } @@ -3614,258 +4085,339 @@ } } - /**Looks for places where brackets should have been, and does the same - * as findAndStructureBrackets. E.g. dimethylaminobenzene -> (dimethylamino)benzene. - * The bracketting in the above case occurs when the substituent that is being procesed is the amino group + /** + * Looks for whether this substituent should be bracketed to the substituent before it + * E.g. dimethylaminobenzene -> (dimethylamino)benzene when the substituent is the amino + * The list of brackets is modified if the method does something + * @param substituent * @param brackets - * @param substituents: An arraylist of substituent elements - * @return Whether the method did something, and so needs to be called again. * @throws StructureBuildingException - * @throws ComponentGenerationException + * @throws ComponentGenerationException */ - private void findAndStructureImplictBrackets(List substituents, List brackets) throws ComponentGenerationException, StructureBuildingException { - for (Element substituent : substituents) {//will attempt to bracket this substituent with the substituent before it - String firstElInSubName =((Element)substituent.getChild(0)).getLocalName(); - if (firstElInSubName.equals(LOCANT_EL) ||firstElInSubName.equals(MULTIPLIER_EL)){ - continue; - } + private void implicitlyBracketToPreviousSubstituentIfAppropriate(Element substituent, List brackets) throws ComponentGenerationException, StructureBuildingException { + String firstElInSubName = substituent.getChild(0).getName(); + if (firstElInSubName.equals(LOCANT_EL) || firstElInSubName.equals(MULTIPLIER_EL)){ + return; + } - Element substituentGroup = substituent.getFirstChildElement(GROUP_EL); - //Only some substituents are valid joiners (e.g. no rings are valid joiners). Need to be atleast bivalent. - if (substituentGroup.getAttribute(USABLEASJOINER_ATR)==null){ - continue; - } + Element substituentGroup = substituent.getFirstChildElement(GROUP_EL); + //Only some substituents are valid joiners (e.g. no rings are valid joiners). Need to be atleast bivalent. + if (substituentGroup.getAttribute(USABLEASJOINER_ATR) == null){ + return; + } - //checks that the element before is a substituent or a bracket which will obviously include substituent/s - //this makes sure there's more than just a substituent in the bracket - Element elementBeforeSubstituent =(Element)XOMTools.getPreviousSibling(substituent); - if (elementBeforeSubstituent ==null|| - !elementBeforeSubstituent.getLocalName().equals(SUBSTITUENT_EL) && - !elementBeforeSubstituent.getLocalName().equals(BRACKET_EL)){ - continue; - } - - Element elementAftersubstituent =(Element)XOMTools.getNextSibling(substituent); - if (elementAftersubstituent != null){ - //Not preceded and succeded by a bracket e.g. Not (benzyl)methyl(phenyl)amine c.f. P-16.4.1.3 (draft 2004) - if (elementBeforeSubstituent.getLocalName().equals(BRACKET_EL) && !IMPLICIT_TYPE_VAL.equals(elementBeforeSubstituent.getAttributeValue(TYPE_ATR)) && elementAftersubstituent.getLocalName().equals(BRACKET_EL)){ - Element firstChildElementOfElementAfterSubstituent = (Element) elementAftersubstituent.getChild(0); - if ((firstChildElementOfElementAfterSubstituent.getLocalName().equals(SUBSTITUENT_EL) || firstChildElementOfElementAfterSubstituent.getLocalName().equals(BRACKET_EL)) - && !((Element)XOMTools.getPrevious(firstChildElementOfElementAfterSubstituent)).getLocalName().equals(HYPHEN_EL)){ - continue; - } + //checks that the element before is a substituent or a bracket which will obviously include substituent/s + //this makes sure that there would be more than more than just a substituent if a bracket is added + Element elementBeforeSubstituent = OpsinTools.getPreviousSibling(substituent); + if (elementBeforeSubstituent == null || + !elementBeforeSubstituent.getName().equals(SUBSTITUENT_EL) && + !elementBeforeSubstituent.getName().equals(BRACKET_EL)) { + return; + } + + Element elementAftersubstituent = OpsinTools.getNextSibling(substituent); + if (elementAftersubstituent != null) { + //Not preceded and followed by a bracket e.g. Not (benzyl)methyl(phenyl)amine c.f. P-16.4.1.3 (draft 2004) + if (elementBeforeSubstituent.getName().equals(BRACKET_EL) && !IMPLICIT_TYPE_VAL.equals(elementBeforeSubstituent.getAttributeValue(TYPE_ATR)) && elementAftersubstituent.getName().equals(BRACKET_EL)) { + Element firstChildElementOfElementAfterSubstituent = elementAftersubstituent.getChild(0); + if ((firstChildElementOfElementAfterSubstituent.getName().equals(SUBSTITUENT_EL) || firstChildElementOfElementAfterSubstituent.getName().equals(BRACKET_EL)) + && !OpsinTools.getPrevious(firstChildElementOfElementAfterSubstituent).getName().equals(HYPHEN_EL)) { + return; } } - //there must be an element after the substituent (or the substituent is being used for locanted ester formation) for the implicit bracket to be required - if (elementAftersubstituent ==null || - !elementAftersubstituent.getLocalName().equals(SUBSTITUENT_EL) && - !elementAftersubstituent.getLocalName().equals(BRACKET_EL) && - !elementAftersubstituent.getLocalName().equals(ROOT_EL)){ - if (elementAftersubstituent == null && ((Element)substituent.getParent()).getLocalName().equals(WORD_EL) && ( - state.currentWordRule == WordRule.ester || state.currentWordRule == WordRule.functionalClassEster || state.currentWordRule == WordRule.multiEster || state.currentWordRule == WordRule.acetal)){ - //special case to allow bracketting for locanted esters - } - else{ - continue; - } + } + //there must be an element after the substituent (or the substituent is being used for locanted ester formation) for the implicit bracket to be required + if (!isSubBracketOrRoot(elementAftersubstituent)) { + if (!(elementAftersubstituent == null && locantedEsterImplicitBracketSpecialCase(substituent, elementBeforeSubstituent))) { + return; } + } - //look for hyphen between substituents, this seems to indicate implicit bracketing was not desired e.g. dimethylaminomethane vs dimethyl-aminomethane - Element elementDirectlyBeforeSubstituent = (Element) XOMTools.getPrevious(substituent.getChild(0));//can't return null as we know elementBeforeSubstituent is not null - if (elementDirectlyBeforeSubstituent.getLocalName().equals(HYPHEN_EL)){ - continue; - } - - Fragment frag =state.xmlFragmentMap.get(substituentGroup); - String theSubstituentSubType = substituentGroup.getAttributeValue(SUBTYPE_ATR); - String theSubstituentType = substituentGroup.getAttributeValue(TYPE_ATR); - - //prevents alkyl chains being bracketed together e.g. ethylmethylamine - //...unless it's something like 2-methylethyl where the first appears to be locanted onto the second - List groupElements = XOMTools.getDescendantElementsWithTagName(elementBeforeSubstituent, GROUP_EL);//one for a substituent, possibly more for a bracket - Element lastGroupOfElementBeforeSub =groupElements.get(groupElements.size()-1); - if (lastGroupOfElementBeforeSub==null){throw new ComponentGenerationException("No group where group was expected");} - if (theSubstituentType.equals(CHAIN_TYPE_VAL) && theSubstituentSubType.equals(ALKANESTEM_SUBTYPE_VAL) && - lastGroupOfElementBeforeSub.getAttributeValue(TYPE_ATR).equals(CHAIN_TYPE_VAL) && lastGroupOfElementBeforeSub.getAttributeValue(SUBTYPE_ATR).equals(ALKANESTEM_SUBTYPE_VAL)){ - boolean placeInImplicitBracket =false; - - Element suffixAfterGroup=(Element)XOMTools.getNextSibling(lastGroupOfElementBeforeSub, SUFFIX_EL); - //if the alkane ends in oxy, sulfinyl, sulfonyl etc. it's not a pure alkane (other suffixes don't need to be considered as they would produce silly structures) - if (suffixAfterGroup !=null && matchInlineSuffixesThatAreAlsoGroups.matcher(suffixAfterGroup.getValue()).matches()){ - placeInImplicitBracket =true; - } - //look for locants and check whether they appear to be referring to the other chain - if (!placeInImplicitBracket){ - Elements childrenOfElementBeforeSubstituent =elementBeforeSubstituent.getChildElements(); - Boolean foundLocantNotReferringToChain =null; - for (int i = 0; i < childrenOfElementBeforeSubstituent.size(); i++) { - String currentElementName = childrenOfElementBeforeSubstituent.get(i).getLocalName(); - if (currentElementName.equals(LOCANT_EL)){ - String locantText =childrenOfElementBeforeSubstituent.get(i).getValue(); - if(!frag.hasLocant(locantText)){ - foundLocantNotReferringToChain=true; - break; - } - else{ - foundLocantNotReferringToChain=false; - } - } - else if (currentElementName.equals(STEREOCHEMISTRY_EL)){ + //look for hyphen between substituents, this seems to indicate implicit bracketing was not desired e.g. dimethylaminomethane vs dimethyl-aminomethane + //an exception is made for groups like carbonyl/sulfonyl as these typically should be implicitly bracketed e.g. tert-butoxy-carbonyl + Element elementDirectlyBeforeSubstituent = OpsinTools.getPrevious(substituent.getChild(0));//can't return null as we know elementBeforeSubstituent is not null + if (elementDirectlyBeforeSubstituent.getName().equals(HYPHEN_EL) && + !matchGroupsThatAreAlsoInlineSuffixes.matcher(substituentGroup.getValue()).matches()) { + return; + } + + Fragment frag = substituentGroup.getFrag(); + String theSubstituentSubType = substituentGroup.getAttributeValue(SUBTYPE_ATR); + String theSubstituentType = substituentGroup.getAttributeValue(TYPE_ATR); + + //prevents alkyl chains being bracketed together e.g. ethylmethylamine + //...unless it's something like 2-methylethyl where the first appears to be locanted onto the second + List groupElements = OpsinTools.getDescendantElementsWithTagName(elementBeforeSubstituent, GROUP_EL);//one for a substituent, possibly more for a bracket + Element lastGroupOfElementBeforeSub = groupElements.get(groupElements.size() - 1); + if (lastGroupOfElementBeforeSub == null) { + throw new ComponentGenerationException("No group where group was expected"); + } + if (theSubstituentType.equals(CHAIN_TYPE_VAL) && theSubstituentSubType.equals(ALKANESTEM_SUBTYPE_VAL) && + lastGroupOfElementBeforeSub.getAttributeValue(TYPE_ATR).equals(CHAIN_TYPE_VAL) && lastGroupOfElementBeforeSub.getAttributeValue(SUBTYPE_ATR).equals(ALKANESTEM_SUBTYPE_VAL)) { + boolean placeInImplicitBracket = false; + + Element suffixAfterGroup = OpsinTools.getNextSibling(lastGroupOfElementBeforeSub, SUFFIX_EL); + //if the alkane ends in oxy, sulfinyl, sulfonyl etc. it's not a pure alkane + //the outatom check rules out things like "oyl" which don't extend the chain + if (suffixAfterGroup !=null && suffixAfterGroup.getFrag() != null && suffixAfterGroup.getFrag().getOutAtomCount() > 0) { + placeInImplicitBracket = true; + } + //look for locants and check whether they appear to be referring to the other chain + if (!placeInImplicitBracket) { + List childrenOfElementBeforeSubstituent = elementBeforeSubstituent.getChildElements(); + Boolean foundLocantNotReferringToChain = null; + for (Element childOfElBeforeSub : childrenOfElementBeforeSubstituent) { + String currentElementName = childOfElBeforeSub.getName(); + if (currentElementName.equals(LOCANT_EL)){ + String locantText = childOfElBeforeSub.getValue(); + if(!frag.hasLocant(locantText)){ + foundLocantNotReferringToChain = true; + break; } else{ - break; + foundLocantNotReferringToChain = false; } } - if (foundLocantNotReferringToChain !=null && !foundLocantNotReferringToChain){//a locant was found and it appeared to refer to the other chain - placeInImplicitBracket=true; + else if (currentElementName.equals(STEREOCHEMISTRY_EL)){ + } + else{ + break; } } - if (!placeInImplicitBracket){ - continue; + if (foundLocantNotReferringToChain !=null && !foundLocantNotReferringToChain){//a locant was found and it appeared to refer to the other chain + placeInImplicitBracket = true; } } + if (!placeInImplicitBracket){ + return; + } + } - //prevent bracketing to multi radicals unless through substitution they are likely to cease being multiradicals - if (lastGroupOfElementBeforeSub.getAttribute(ISAMULTIRADICAL_ATR)!=null && lastGroupOfElementBeforeSub.getAttribute(ACCEPTSADDITIVEBONDS_ATR)==null && lastGroupOfElementBeforeSub.getAttribute(IMINOLIKE_ATR)==null){ - continue; + //prevent bracketing to multi radicals unless through substitution they are likely to cease being multiradicals + if (lastGroupOfElementBeforeSub.getAttribute(ISAMULTIRADICAL_ATR) != null && lastGroupOfElementBeforeSub.getAttribute(ACCEPTSADDITIVEBONDS_ATR) == null && lastGroupOfElementBeforeSub.getAttribute(IMINOLIKE_ATR) == null) { + return; + } + if (substituentGroup.getAttribute(ISAMULTIRADICAL_ATR) != null) { + if (substituentGroup.getAttribute(ACCEPTSADDITIVEBONDS_ATR) == null && substituentGroup.getAttribute(IMINOLIKE_ATR) == null) { + //after implicit bracketting the substituent should no longer be a multi-radical. If neither of the above attributes apply this can't happen + return; } - if (substituentGroup.getAttribute(ISAMULTIRADICAL_ATR)!=null && substituentGroup.getAttribute(ACCEPTSADDITIVEBONDS_ATR)==null && substituentGroup.getAttribute(IMINOLIKE_ATR)==null){ - continue; + //being not substitutable doesn't mean it can't form additive bonds cf. oxy. Additive bonds can still benefit from implicit bracketing + boolean isSubstitutable = false; + for (Atom atom : frag.getAtomList()) { + if (StructureBuildingMethods.calculateSubstitutableHydrogenAtoms(atom) > 0){ + isSubstitutable = true; + break; + } } - if (lastGroupOfElementBeforeSub.getAttribute(IMINOLIKE_ATR)!=null && substituentGroup.getAttribute(IMINOLIKE_ATR)!=null){ - continue;//possibly a multiplicative additive operation + if (!isSubstitutable && elementAftersubstituent != null && elementAftersubstituent.getChild(0).getName().equals(MULTIPLIER_EL)) { + //return if multiplicative nomenclature detected, if the multiplier differs from the out atom count, additive bonds may still be possible + if (frag.getOutAtomCount() == Integer.parseInt(elementAftersubstituent.getChild(0).getAttributeValue(VALUE_ATR))){ + String elType = elementAftersubstituent.getName(); + if (elType.equals(ROOT_EL)) { + return; + } + else if (elType.equals(SUBSTITUENT_EL)) { + List groups = OpsinTools.getDescendantElementsWithTagName(elementAftersubstituent, GROUP_EL); + for (Element group : groups) { + if (group.getAttribute(ISAMULTIRADICAL_ATR) != null){ + return ;//a multi radical + } + } + } + else if (elType.equals(BRACKET_EL) && OpsinTools.getDescendantElementsWithTagName(elementAftersubstituent, ROOT_EL).size() > 0) { + return; + } + } } - - //prevent bracketting perhalogeno terms - if (PERHALOGENO_SUBTYPE_VAL.equals(lastGroupOfElementBeforeSub.getAttributeValue(SUBTYPE_ATR))){ - continue; + } + if (lastGroupOfElementBeforeSub.getAttribute(IMINOLIKE_ATR) != null && substituentGroup.getAttribute(IMINOLIKE_ATR) != null){ + return;//possibly a multiplicative additive operation + } + + if (implicitBracketWouldPreventAdditiveBonding(elementBeforeSubstituent, elementAftersubstituent)) { + return;//e.g. N-ethylmethylsulfonimidoyl + } + + if (substituentGroup.getValue().equals("sulf") && frag.getAtomCount() == 1) { + Element suffix = OpsinTools.getNextSiblingIgnoringCertainElements(substituentGroup, new String[]{UNSATURATOR_EL}); + if (suffix != null && suffix.getAttributeValue(VALUE_ATR).equals("ylidene")) { + substituentGroup.removeAttribute(substituentGroup.getAttribute(USABLEASJOINER_ATR)); + //TODO resolve suffixes as early as can be done unambiguously + //e.g. it should be possible to know that sulfanylidene has 0 hydrogen but azaniumylidyne has 1 + return; } + } + + //prevent bracketing perhalogeno terms + if (PERHALOGENO_SUBTYPE_VAL.equals(lastGroupOfElementBeforeSub.getAttributeValue(SUBTYPE_ATR))) { + return; + } - /* - * locant may need to be moved. This occurs when the group in elementBeforeSubstituent is not supposed to be locanted onto - * theSubstituentGroup - * e.g. 2-aminomethyl-1-chlorobenzene where the 2 refers to the benzene NOT the methyl - */ - List locantRelatedElements = new ArrayList();//sometimes moved - String[] locantValues = null; - ArrayList stereoChemistryElements =new ArrayList();//always moved if bracketing occurs - Elements childrenOfElementBeforeSubstituent = elementBeforeSubstituent.getChildElements(); - for (int i = 0; i < childrenOfElementBeforeSubstituent.size(); i++) { - String currentElementName = childrenOfElementBeforeSubstituent.get(i).getLocalName(); - if (currentElementName.equals(STEREOCHEMISTRY_EL)){ - stereoChemistryElements.add(childrenOfElementBeforeSubstituent.get(i)); - } - else if (currentElementName.equals(LOCANT_EL)){ - if (locantValues !=null){ - break; - } - locantRelatedElements.add(childrenOfElementBeforeSubstituent.get(i)); - locantValues = MATCH_COMMA.split(childrenOfElementBeforeSubstituent.get(i).getValue()); - } - else{ + /* + * locant may need to be moved. This occurs when the group in elementBeforeSubstituent is not supposed to be locanted onto + * theSubstituentGroup + * e.g. 2-aminomethyl-1-chlorobenzene where the 2 refers to the benzene NOT the methyl + */ + List locantRelatedElements = new ArrayList();//sometimes moved + String[] locantValues = null; + List stereoChemistryElements = new ArrayList();//always moved if bracketing occurs + List childrenOfElementBeforeSubstituent = elementBeforeSubstituent.getChildElements(); + for (Element childOfElBeforeSub : childrenOfElementBeforeSubstituent) { + String currentElementName = childOfElBeforeSub.getName(); + if (currentElementName.equals(STEREOCHEMISTRY_EL)){ + stereoChemistryElements.add(childOfElBeforeSub); + } + else if (currentElementName.equals(LOCANT_EL)) { + if (locantValues != null) { break; } + locantRelatedElements.add(childOfElBeforeSub); + locantValues = childOfElBeforeSub.getValue().split(","); } + else{ + break; + } + } - //either all locants will be moved, or none - boolean moveLocants = false; - if (locantValues!=null){ - Element elAfterLocant = (Element) XOMTools.getNextSibling(locantRelatedElements.get(0)); - for (String locantText : locantValues) { - if (elAfterLocant !=null && elAfterLocant.getAttribute(FRONTLOCANTSEXPECTED_ATR)!=null && StringTools.arrayToList(MATCH_COMMA.split(elAfterLocant.getAttributeValue(FRONTLOCANTSEXPECTED_ATR))).contains(locantText)){ - continue; + //either all locants will be moved, or none + boolean moveLocants = false; + if (locantValues != null) { + Element elAfterLocant = OpsinTools.getNextSibling(locantRelatedElements.get(0)); + for (String locantText : locantValues) { + //Check the right fragment in the bracket: + //if it only has 1 then assume locanted substitution onto it not intended. Or if doesn't have the required locant + if (frag.getAtomCount() == 1 || !frag.hasLocant(locantText) || matchElementSymbolOrAminoAcidLocant.matcher(locantText).find() + || (locantValues.length == 1 && elAfterLocant.getName().equals(MULTIPLIER_EL))) { + if (checkLocantPresentOnPotentialRoot(state, substituent, locantText)){ + moveLocants = true;//locant location is present elsewhere + break; } - - //Check the right fragment in the bracket: - //if it only has 1 then assume locanted substitution onto it not intended. Or if doesn't have the required locant - if (frag.getAtomList().size()==1 || !frag.hasLocant(locantText) || matchElementSymbolOrAminoAcidLocant.matcher(locantText).find() - || (locantValues.length ==1 && elAfterLocant.getLocalName().equals(MULTIPLIER_EL))){ - if (checkLocantPresentOnPotentialRoot(substituent, locantText)){ - moveLocants =true;//locant location is present elsewhere - break; + else { + if( frag.getAtomCount() == 1 && frag.hasLocant(locantText)) { + //1 locant was intended to locant onto fragment with 1 atom } - else if (findElementsMissingIndirectLocants(elementBeforeSubstituent, locantRelatedElements.get(0)).size()==0 || !state.xmlFragmentMap.get(lastGroupOfElementBeforeSub).hasLocant(locantText)){ - if( frag.getAtomList().size()==1 && frag.hasLocant(locantText)){ - //1 locant was intended to locant onto fragment with 1 atom - } - else{ - moveLocants =true;//the fragment adjacent to the locant doesn't have this locant or doesn't need any indirect locants. Assume it will appear elsewhere later - break; - } + else{ + moveLocants = true;//the fragment adjacent to the locant doesn't have this locant or doesn't need any indirect locants. Assume it will appear elsewhere later + break; } } } - + } - if (moveLocants && locantValues.length >1){ - if (elAfterLocant !=null && elAfterLocant.getLocalName().equals(MULTIPLIER_EL)){ - Element shouldBeAGroupOrSubOrBracket = (Element)XOMTools.getNextSiblingIgnoringCertainElements(elAfterLocant, new String[]{MULTIPLIER_EL}); - if (shouldBeAGroupOrSubOrBracket !=null){ - if ((shouldBeAGroupOrSubOrBracket.getLocalName().equals(GROUP_EL) && elAfterLocant.getAttributeValue(TYPE_ATR).equals(GROUP_TYPE_VAL))//e.g. 2,5-bisaminothiobenzene --> 2,5-bis(aminothio)benzene - || (frag.getAtomList().size()==1)//e.g. 1,3,4-trimethylthiobenzene - || (matchInlineSuffixesThatAreAlsoGroups.matcher(substituentGroup.getValue()).matches())){//e.g. 4,4'-dimethoxycarbonyl-2,2'-bioxazole --> 4,4'-di(methoxycarbonyl)-2,2'-bioxazole - locantRelatedElements.add(elAfterLocant);//e.g. 1,5-bis-(4-methylphenyl)sulfonyl --> 1,5-bis-((4-methylphenyl)sulfonyl) - } - else if (ORTHOMETAPARA_TYPE_VAL.equals(locantRelatedElements.get(0).getAttributeValue(TYPE_ATR))){//e.g. p-dimethylamino[ring] - XOMTools.setTextChild(locantRelatedElements.get(0), locantValues[1]); - } - else{//don't bracket other complex multiplied substituents (name hasn't given enough hints if indeed bracketing was expected) - continue; - } + if (moveLocants && locantValues.length > 1) { + if (elAfterLocant != null && elAfterLocant.getName().equals(MULTIPLIER_EL)) { + Element shouldBeAGroupOrSubOrBracket = OpsinTools.getNextSiblingIgnoringCertainElements(elAfterLocant, new String[]{MULTIPLIER_EL}); + if (shouldBeAGroupOrSubOrBracket != null) { + if ((shouldBeAGroupOrSubOrBracket.getName().equals(GROUP_EL) && elAfterLocant.getAttributeValue(TYPE_ATR).equals(GROUP_TYPE_VAL))//e.g. 2,5-bisaminothiobenzene --> 2,5-bis(aminothio)benzene + || (matchGroupsThatAreAlsoInlineSuffixes.matcher(substituentGroup.getValue()).matches())){//e.g. 4,4'-dimethoxycarbonyl-2,2'-bioxazole --> 4,4'-di(methoxycarbonyl)-2,2'-bioxazole + locantRelatedElements.add(elAfterLocant);//e.g. 1,5-bis-(4-methylphenyl)sulfonyl --> 1,5-bis-((4-methylphenyl)sulfonyl) } - else{ - moveLocants =false; + else if (ORTHOMETAPARA_TYPE_VAL.equals(locantRelatedElements.get(0).getAttributeValue(TYPE_ATR))) {//e.g. p-dimethylamino[ring] + locantRelatedElements.get(0).setValue(locantValues[1]); + } + else if (frag.getAtomCount() == 1) {//e.g. 1,3,4-trimethylthiobenzene --> 1,3,4-tri(methylthio)benzene + locantRelatedElements.add(elAfterLocant); + } + else{//don't bracket other complex multiplied substituents (name hasn't given enough hints if indeed bracketing was expected) + return; } } else{ - moveLocants =false; + moveLocants = false; } } + else{ + moveLocants = false; + } } + } - Element bracket = new Element(BRACKET_EL); - bracket.addAttribute(new Attribute(TYPE_ATR, IMPLICIT_TYPE_VAL)); + Element bracket = new GroupingEl(BRACKET_EL); + bracket.addAttribute(new Attribute(TYPE_ATR, IMPLICIT_TYPE_VAL)); - for (Element stereoChemistryElement : stereoChemistryElements) { - stereoChemistryElement.detach(); - bracket.appendChild(stereoChemistryElement); - } - if (moveLocants){ - for (Element locantElement : locantRelatedElements) { - locantElement.detach(); - bracket.appendChild(locantElement); - } + for (Element stereoChemistryElement : stereoChemistryElements) { + stereoChemistryElement.detach(); + bracket.addChild(stereoChemistryElement); + } + if (moveLocants){ + for (Element locantElement : locantRelatedElements) { + locantElement.detach(); + bracket.addChild(locantElement); } + } - /* - * Case when a multiplier should be moved - * e.g. tripropan-2-yloxyphosphane -->tri(propan-2-yloxy)phosphane or trispropan-2-ylaminophosphane --> tris(propan-2-ylamino)phosphane - */ - if (locantRelatedElements.size()==0){ - Element possibleMultiplier =childrenOfElementBeforeSubstituent.get(0); - if (possibleMultiplier.getLocalName().equals(MULTIPLIER_EL) && ( - matchInlineSuffixesThatAreAlsoGroups.matcher(substituentGroup.getValue()).matches() || possibleMultiplier.getAttributeValue(TYPE_ATR).equals(GROUP_TYPE_VAL))){ - Element desiredGroup = XOMTools.getNextSiblingIgnoringCertainElements(possibleMultiplier, new String[]{MULTIPLIER_EL}); - if (desiredGroup !=null && desiredGroup.getLocalName().equals(GROUP_EL)){ - childrenOfElementBeforeSubstituent.get(0).detach(); - bracket.appendChild(childrenOfElementBeforeSubstituent.get(0)); - } + /* + * Case when a multiplier should be moved + * e.g. tripropan-2-yloxyphosphane -->tri(propan-2-yloxy)phosphane or trispropan-2-ylaminophosphane --> tris(propan-2-ylamino)phosphane + */ + if (locantRelatedElements.size() == 0) { + Element possibleMultiplier = childrenOfElementBeforeSubstituent.get(0); + if (possibleMultiplier.getName().equals(MULTIPLIER_EL) && ( + matchGroupsThatAreAlsoInlineSuffixes.matcher(substituentGroup.getValue()).matches() || possibleMultiplier.getAttributeValue(TYPE_ATR).equals(GROUP_TYPE_VAL))){ + Element desiredGroup = OpsinTools.getNextSiblingIgnoringCertainElements(possibleMultiplier, new String[]{MULTIPLIER_EL}); + if (desiredGroup !=null && desiredGroup.getName().equals(GROUP_EL)) { + possibleMultiplier.detach(); + bracket.addChild(possibleMultiplier); } } + } - Element parent = (Element)substituent.getParent(); - int startIndex=parent.indexOf(elementBeforeSubstituent); - int endIndex=parent.indexOf(substituent); - for(int i = 0 ; i <= (endIndex-startIndex);i++) { - Node n = parent.getChild(startIndex); - n.detach(); - bracket.appendChild(n); + Element parent = substituent.getParent(); + int startIndex = parent.indexOf(elementBeforeSubstituent); + int endIndex = parent.indexOf(substituent); + for(int i = 0 ; i <= (endIndex - startIndex); i++) { + Element n = parent.getChild(startIndex); + n.detach(); + bracket.addChild(n); + } + parent.insertChild(bracket, startIndex); + brackets.add(bracket); + } + + private boolean implicitBracketWouldPreventAdditiveBonding(Element elementBeforeSubstituent, Element elementAftersubstituent) { + if (elementAftersubstituent != null && elementAftersubstituent.getName().equals(SUBSTITUENT_EL)) { + Element groupAfterSubstituent = elementAftersubstituent.getFirstChildElement(GROUP_EL); + if (groupAfterSubstituent.getAttribute(ACCEPTSADDITIVEBONDS_ATR) != null && + !isSubBracketOrRoot(OpsinTools.getNextSibling(elementAftersubstituent))) { + if (elementBeforeSubstituent.getChild(0).getName().equals(LOCANT_EL)) { + Fragment additiveAcceptingFrag = groupAfterSubstituent.getFrag(); + Element viableSubstituent = elementBeforeSubstituent; + while (viableSubstituent != null) { + if (viableSubstituent.getName().equals(SUBSTITUENT_EL) || viableSubstituent.getName().equals(BRACKET_EL)) { + Element possibleLocant = viableSubstituent.getChild(0); + if (possibleLocant.getName().equals(LOCANT_EL)){ + if (additiveAcceptingFrag.getFirstAtom().equals(additiveAcceptingFrag.getAtomByLocant(possibleLocant.getValue()))) { + return false; + } + } + } + viableSubstituent = OpsinTools.getPreviousSibling(viableSubstituent); + } + return true; + } } - parent.insertChild(bracket, startIndex); - brackets.add(bracket); } + return false; } + /** + * Retrusn true in the case that: + * the given substituent is a direct child of a word element + * The preceding substituent/bracket is the first element in the word element + * The current word rule invovles locanted ester like linkages + * @param substituent + * @param elementBeforeSubstituent + * @return + */ + private boolean locantedEsterImplicitBracketSpecialCase(Element substituent, Element elementBeforeSubstituent) { + if (substituent.getParent().getName().equals(WORD_EL) && + OpsinTools.getPreviousSibling(elementBeforeSubstituent) == null && + (state.currentWordRule == WordRule.ester || state.currentWordRule == WordRule.functionalClassEster || state.currentWordRule == WordRule.multiEster || state.currentWordRule == WordRule.acetal)){ + return true; + } + return false; + } /** * Attempts to match locants to non adjacent suffixes/unsatuators @@ -3882,13 +4434,13 @@ if (locantEls.size()>0){ Element group =subOrRoot.getFirstChildElement(GROUP_EL); Element lastLocant = locantEls.get(locantEls.size()-1);//the locant that may apply to an unsaturator/suffix - String[] locantValues = MATCH_COMMA.split(lastLocant.getValue()); + String[] locantValues = lastLocant.getValue().split(","); if (locantValues.length==1 && group.getAttribute(FRONTLOCANTSEXPECTED_ATR)!=null){//some trivial retained names like 2-furyl expect locants to be in front of them. For these the indirect intepretation will always be used rather than checking whether 2-(furyl) even makes sense - String[] allowedLocants = MATCH_COMMA.split(group.getAttributeValue(FRONTLOCANTSEXPECTED_ATR)); + String[] allowedLocants = group.getAttributeValue(FRONTLOCANTSEXPECTED_ATR).split(","); for (String allowedLocant : allowedLocants) { if (locantValues[0].equals(allowedLocant)){ - Element expectedSuffix =(Element) XOMTools.getNextSibling(group); - if (expectedSuffix!=null && expectedSuffix.getLocalName().equals(SUFFIX_EL) && expectedSuffix.getAttribute(LOCANT_ATR)==null){ + Element expectedSuffix = OpsinTools.getNextSibling(group); + if (expectedSuffix!=null && expectedSuffix.getName().equals(SUFFIX_EL) && expectedSuffix.getAttribute(LOCANT_ATR)==null){ expectedSuffix.addAttribute(new Attribute(LOCANT_ATR, locantValues[0])); lastLocant.detach(); return; @@ -3899,14 +4451,14 @@ } boolean allowIndirectLocants =true; if(state.currentWordRule == WordRule.multiEster && !ADDEDHYDROGENLOCANT_TYPE_VAL.equals(lastLocant.getAttributeValue(TYPE_ATR))){//special case e.g. 1-benzyl 4-butyl terephthalate (locants do not apply to yls) - Element parentEl=(Element) subOrRoot.getParent(); - if (parentEl.getLocalName().equals(WORD_EL) && parentEl.getAttributeValue(TYPE_ATR).equals(SUBSTITUENT_EL) && parentEl.getChildCount()==1 && + Element parentEl = subOrRoot.getParent(); + if (parentEl.getName().equals(WORD_EL) && parentEl.getAttributeValue(TYPE_ATR).equals(SUBSTITUENT_EL) && parentEl.getChildCount()==1 && locantValues.length==1 && !ORTHOMETAPARA_TYPE_VAL.equals(lastLocant.getAttributeValue(TYPE_ATR))){ allowIndirectLocants =false; } } - Fragment fragmentAfterLocant =state.xmlFragmentMap.get(group); - if (fragmentAfterLocant.getAtomList().size()<=1){ + Fragment fragmentAfterLocant = group.getFrag(); + if (fragmentAfterLocant.getAtomCount()<=1){ allowIndirectLocants =false;//e.g. prevent 1-methyl as meth-1-yl is extremely unlikely to be the intended result } @@ -3915,7 +4467,7 @@ * If the locant cannot be found on a potential root this cannot be the case though (assuming the name is valid of course) */ if (!ADDEDHYDROGENLOCANT_TYPE_VAL.equals(lastLocant.getAttributeValue(TYPE_ATR)) && locantEls.size() ==1 && group.getAttribute(ISAMULTIRADICAL_ATR)==null && - locantValues.length == 1 && checkLocantPresentOnPotentialRoot(subOrRoot, locantValues[0]) && XOMTools.getPreviousSibling(lastLocant, LOCANT_EL)==null){ + locantValues.length == 1 && checkLocantPresentOnPotentialRoot(state, subOrRoot, locantValues[0]) && OpsinTools.getPreviousSibling(lastLocant, LOCANT_EL)==null){ return; } boolean assignableToIndirectFeatures =true; @@ -3979,18 +4531,17 @@ * @return */ private List findLocantsThatCouldBeIndirectLocants(Element subOrRoot) { - Elements children = subOrRoot.getChildElements(); + List children = subOrRoot.getChildElements(); List locantEls = new ArrayList(); - for (int i = 0; i < children.size(); i++) { - Element el = children.get(i); - if (el.getLocalName().equals(LOCANT_EL)){ - Element afterLocant =(Element) XOMTools.getNextSibling(el); - if (afterLocant!=null && afterLocant.getLocalName().equals(MULTIPLIER_EL)){//locant should not be followed by a multiplier. c.f. 1,2,3-tributyl 2-acetyloxypropane-1,2,3-tricarboxylate + for (Element el : children) { + if (el.getName().equals(LOCANT_EL)){ + Element afterLocant = OpsinTools.getNextSibling(el); + if (afterLocant!=null && afterLocant.getName().equals(MULTIPLIER_EL)){//locant should not be followed by a multiplier. c.f. 1,2,3-tributyl 2-acetyloxypropane-1,2,3-tricarboxylate continue; } locantEls.add(el); } - else if (el.getLocalName().equals(GROUP_EL)){ + else if (el.getName().equals(GROUP_EL)){ break; } } @@ -4007,15 +4558,14 @@ */ private List findElementsMissingIndirectLocants(Element subOrRoot,Element locantEl) { List locantAble = new ArrayList(); - Elements childrenOfSubOrBracketOrRoot=subOrRoot.getChildElements(); - for (int j = 0; j < childrenOfSubOrBracketOrRoot.size(); j++) { - Element el =childrenOfSubOrBracketOrRoot.get(j); - String name =el.getLocalName(); + List childrenOfSubOrBracketOrRoot=subOrRoot.getChildElements(); + for (Element el : childrenOfSubOrBracketOrRoot) { + String name =el.getName(); if (name.equals(SUFFIX_EL) || name.equals(UNSATURATOR_EL) || name.equals(CONJUNCTIVESUFFIXGROUP_EL)){ if (el.getAttribute(LOCANT_ATR) ==null && el.getAttribute(LOCANTID_ATR) ==null && el.getAttribute(MULTIPLIED_ATR)==null){// shouldn't already have a locant or be multiplied (should of already had locants assignd to it if that were the case) if (subOrRoot.indexOf(el)>subOrRoot.indexOf(locantEl)){ if (name.equals(SUFFIX_EL)){//check a few special cases that must not be locanted - Element group = (Element) XOMTools.getPreviousSibling(el, GROUP_EL); + Element group = OpsinTools.getPreviousSibling(el, GROUP_EL); String type = group.getAttributeValue(TYPE_ATR); if ((type.equals(ACIDSTEM_TYPE_VAL) && !CYCLEFORMER_SUBTYPE_VAL.equals(el.getAttributeValue(SUBTYPE_ATR)))|| type.equals(NONCARBOXYLICACID_TYPE_VAL) || type.equals(CHALCOGENACIDSTEM_TYPE_VAL)){ @@ -4039,12 +4589,12 @@ private void assignImplicitLocantsToDiTerminalSuffixes(Element subOrRoot) throws StructureBuildingException { Element terminalSuffix1 = subOrRoot.getFirstChildElement(SUFFIX_EL); if (terminalSuffix1!=null){ - if (isATerminalSuffix(terminalSuffix1) && XOMTools.getNextSibling(terminalSuffix1) != null){ - Element terminalSuffix2 =(Element)XOMTools.getNextSibling(terminalSuffix1); + if (isATerminalSuffix(terminalSuffix1) && OpsinTools.getNextSibling(terminalSuffix1) != null){ + Element terminalSuffix2 =OpsinTools.getNextSibling(terminalSuffix1); if (isATerminalSuffix(terminalSuffix2)){ - Element hopefullyAChain = (Element) XOMTools.getPreviousSibling((Element)terminalSuffix1, GROUP_EL); + Element hopefullyAChain = OpsinTools.getPreviousSibling(terminalSuffix1, GROUP_EL); if (hopefullyAChain != null && hopefullyAChain.getAttributeValue(TYPE_ATR).equals(CHAIN_TYPE_VAL)){ - int chainLength = state.xmlFragmentMap.get(hopefullyAChain).getChainLength(); + int chainLength = hopefullyAChain.getFrag().getChainLength(); if (chainLength >=2){ terminalSuffix1.addAttribute(new Attribute(LOCANT_ATR, "1")); terminalSuffix2.addAttribute(new Attribute(LOCANT_ATR, Integer.toString(chainLength))); @@ -4063,401 +4613,45 @@ * @return */ private boolean isATerminalSuffix(Element suffix){ - return suffix.getLocalName().equals(SUFFIX_EL) && - suffix.getAttribute(LOCANT_ATR) == null && - (suffix.getAttributeValue(TYPE_ATR).equals(INLINE_TYPE_VAL) || TERMINAL_SUBTYPE_VAL.equals(suffix.getAttributeValue(SUBTYPE_ATR))); + return suffix.getName().equals(SUFFIX_EL) && + suffix.getAttribute(LOCANT_ATR) == null && + (suffix.getAttributeValue(TYPE_ATR).equals(INLINE_TYPE_VAL) || TERMINAL_SUBTYPE_VAL.equals(suffix.getAttributeValue(SUBTYPE_ATR))); } private void processConjunctiveNomenclature(Element subOrRoot) throws ComponentGenerationException, StructureBuildingException { - List conjunctiveGroups = XOMTools.getChildElementsWithTagName(subOrRoot, CONJUNCTIVESUFFIXGROUP_EL); - if (conjunctiveGroups.size()>0){ + List conjunctiveGroups = subOrRoot.getChildElements(CONJUNCTIVESUFFIXGROUP_EL); + int conjunctiveGroupCount = conjunctiveGroups.size(); + if (conjunctiveGroupCount > 0){ Element ringGroup = subOrRoot.getFirstChildElement(GROUP_EL); - Fragment ringFrag = state.xmlFragmentMap.get(ringGroup); + Fragment ringFrag = ringGroup.getFrag(); if (ringFrag.getOutAtomCount()!=0 ){ throw new ComponentGenerationException("OPSIN Bug: Ring fragment should have no radicals"); } - List conjunctiveFragments = new ArrayList(); - for (Element group : conjunctiveGroups) { - Fragment frag = state.xmlFragmentMap.get(group); - conjunctiveFragments.add(frag); - } - for (int i = 0; i < conjunctiveFragments.size(); i++) { - Fragment conjunctiveFragment = conjunctiveFragments.get(i); - if (conjunctiveGroups.get(i).getAttribute(LOCANT_ATR)!=null){ - state.fragManager.createBond(lastNonSuffixCarbonWithSufficientValency(conjunctiveFragment), ringFrag.getAtomByLocantOrThrow(conjunctiveGroups.get(i).getAttributeValue(LOCANT_ATR)) , 1); + for (int i = 0; i < conjunctiveGroupCount; i++) { + Element conjunctiveGroup = conjunctiveGroups.get(i); + Fragment conjunctiveFragment = conjunctiveGroup.getFrag(); + String locant = conjunctiveGroup.getAttributeValue(LOCANT_ATR); + Atom atomToConnectToOnConjunctiveFrag = FragmentTools.lastNonSuffixCarbonWithSufficientValency(conjunctiveFragment); + if (atomToConnectToOnConjunctiveFrag == null) { + throw new ComponentGenerationException("OPSIN Bug: Unable to find non suffix carbon with sufficient valency"); + } + if (locant != null){ + state.fragManager.createBond(atomToConnectToOnConjunctiveFrag, ringFrag.getAtomByLocantOrThrow(locant) , 1); } else{ - state.fragManager.createBond(lastNonSuffixCarbonWithSufficientValency(conjunctiveFragment), ringFrag.getAtomOrNextSuitableAtomOrThrow(ringFrag.getFirstAtom(), 1, true) , 1); + List possibleAtoms = FragmentTools.findSubstituableAtoms(ringFrag, 1); + if (possibleAtoms.isEmpty()){ + throw new StructureBuildingException("No suitable atom found for conjunctive operation"); + } + if (AmbiguityChecker.isSubstitutionAmbiguous(possibleAtoms, 1)) { + state.addIsAmbiguous("Connection of conjunctive group to: " + ringGroup.getValue()); + } + state.fragManager.createBond(atomToConnectToOnConjunctiveFrag, possibleAtoms.get(0) , 1); } state.fragManager.incorporateFragment(conjunctiveFragment, ringFrag); } } } - - - private Atom lastNonSuffixCarbonWithSufficientValency(Fragment conjunctiveFragment) throws ComponentGenerationException { - List atomList = conjunctiveFragment.getAtomList(); - for (int i = atomList.size()-1; i >=0; i--) { - Atom a = atomList.get(i); - if (a.getType().equals(SUFFIX_TYPE_VAL)){ - continue; - } - if (!a.getElement().equals("C")){ - continue; - } - if (ValencyChecker.checkValencyAvailableForBond(a, 1)){ - return a; - } - } - throw new ComponentGenerationException("OPSIN Bug: Unable to find non suffix carbon with sufficient valency"); - } - - - /**Process the effects of suffixes upon a fragment. - * Unlocanted non-terminal suffixes are not attached yet. All other suffix effects are performed - * @param group The group element for the fragment to which the suffixes will be added - * @param suffixes The suffix elements for a fragment. - * @throws StructureBuildingException If the suffixes can't be resolved properly. - * @throws ComponentGenerationException - */ - private void resolveSuffixes(Element group, List suffixes) throws StructureBuildingException, ComponentGenerationException { - Fragment frag = state.xmlFragmentMap.get(group); - int firstAtomID = frag.getIdOfFirstAtom();//typically equivalent to locant 1 - List atomList =frag.getAtomList();//this instance of atomList will not change even once suffixes are merged into the fragment - int defaultAtom =0;//indice in atomList - String groupType = frag.getType(); - String subgroupType = frag.getSubType(); - String suffixTypeToUse =null; - if (suffixRules.isGroupTypeWithSpecificSuffixRules(groupType)){ - suffixTypeToUse =groupType; - } - else{ - suffixTypeToUse =STANDARDGROUP_TYPE_VAL; - } - - List suffixList = state.xmlSuffixMap.get(group); - for (Element suffix : suffixes) { - String suffixValue = suffix.getAttributeValue(VALUE_ATR); - - String locant = suffix.getAttributeValue(LOCANT_ATR); - String locantId = suffix.getAttributeValue(LOCANTID_ATR); - int idOnParentFragToUse = 0; - if (locant != null && locant.indexOf(',') == -1) { - idOnParentFragToUse = frag.getIDFromLocantOrThrow(locant); - } - else if (locantId != null && locantId.indexOf(',') == -1) { - idOnParentFragToUse = Integer.parseInt(locantId); - } - else if (suffix.getAttribute(DEFAULTLOCANTID_ATR) != null) { - idOnParentFragToUse = Integer.parseInt(suffix.getAttributeValue(DEFAULTLOCANTID_ATR)); - } - else if (suffixTypeToUse.equals(ACIDSTEM_TYPE_VAL) || suffixTypeToUse.equals(NONCARBOXYLICACID_TYPE_VAL) || suffixTypeToUse.equals(CHALCOGENACIDSTEM_TYPE_VAL)) {//means that e.g. sulfonyl has an explicit outAtom - idOnParentFragToUse = firstAtomID; - } - - Fragment suffixFrag = null; - Elements suffixRuleTags = suffixRules.getSuffixRuleTags(suffixTypeToUse, suffixValue, subgroupType); - for (int j = 0; j < suffixRuleTags.size(); j++) { - Element suffixRuleTag = suffixRuleTags.get(j); - String suffixRuleTagName = suffixRuleTag.getLocalName(); - if (defaultAtom >= atomList.size()) { - defaultAtom = 0; - } - if (suffixRuleTagName.equals(SUFFIXRULES_ADDGROUP_EL)) { - if (suffixFrag == null) { - if (suffixList.size() <= 0) { - throw new ComponentGenerationException("OPSIN Bug: Suffixlist should not be empty"); - } - suffixFrag = suffixList.remove(0);//take the first suffix out of the list, it should of been added in the same order that it is now being read. - Atom firstAtomInSuffix = suffixFrag.getFirstAtom(); - if (firstAtomInSuffix.getBonds().size() <= 0) { - throw new ComponentGenerationException("OPSIN Bug: Dummy atom in suffix should have at least one bond to it"); - } - if (CYCLEFORMER_SUBTYPE_VAL.equals(suffix.getAttributeValue(SUBTYPE_ATR))){ - processCycleFormingSuffix(suffixFrag, frag, suffix); - } - else{ - int bondOrderRequired = firstAtomInSuffix.getIncomingValency(); - Atom parentfragAtom; - if (idOnParentFragToUse == 0) { - if (suffixRuleTag.getAttribute(SUFFIXRULES_KETONELOCANT_ATR) != null && !atomList.get(defaultAtom).getAtomIsInACycle()) { - if (defaultAtom == 0) - defaultAtom = FragmentTools.findKetoneAtomIndice(frag, defaultAtom); - idOnParentFragToUse = atomList.get(defaultAtom).getID(); - defaultAtom++; - } else { - idOnParentFragToUse = atomList.get(defaultAtom).getID(); - } - idOnParentFragToUse = frag.getAtomOrNextSuitableAtomOrThrow(frag.getAtomByIDOrThrow(idOnParentFragToUse), bondOrderRequired, true).getID(); - parentfragAtom = frag.getAtomByIDOrThrow(idOnParentFragToUse); - if (FragmentTools.isCharacteristicAtom(parentfragAtom)){ - throw new StructureBuildingException("No suitable atom found to attach suffix"); - } - } - else{ - parentfragAtom = frag.getAtomByIDOrThrow(idOnParentFragToUse); - } - - //create a new bond and associate it with the suffixfrag and both atoms. Remember the suffixFrag has not been imported into the frag yet - List bonds = new ArrayList(firstAtomInSuffix.getBonds()); - for (Bond bondToSuffix : bonds) { - Atom suffixAtom = bondToSuffix.getOtherAtom(firstAtomInSuffix); - state.fragManager.createBond(parentfragAtom, suffixAtom, bondToSuffix.getOrder()); - state.fragManager.removeBond(bondToSuffix); - if (parentfragAtom.getIncomingValency()>2 && (suffixValue.equals("aldehyde") || suffixValue.equals("al")|| suffixValue.equals("aldoxime"))){//formaldehyde/methanal are excluded as they are substitutable - if("X".equals(suffixAtom.getFirstLocant())){//carbaldehyde - suffixAtom.setProperty(Atom.ISALDEHYDE, true); - } - else{ - parentfragAtom.setProperty(Atom.ISALDEHYDE, true); - } - } - } - } - } - else{ - throw new ComponentGenerationException("OPSIN bug: Suffix may only have one addgroup rule: " + suffix.getValue()); - } - } else if (suffixRuleTagName.equals(SUFFIXRULES_CHANGECHARGE_EL)) { - int chargeChange = Integer.parseInt(suffixRuleTag.getAttributeValue(SUFFIXRULES_CHARGE_ATR)); - int protonChange = Integer.parseInt(suffixRuleTag.getAttributeValue(SUFFIXRULES_PROTONS_ATR)); - if (suffix.getAttribute(SUFFIXPREFIX_ATR) == null) { - if (idOnParentFragToUse != 0) { - frag.getAtomByIDOrThrow(idOnParentFragToUse).addChargeAndProtons(chargeChange, protonChange); - } - else{ - applyUnlocantedChargeModification(atomList, chargeChange, protonChange); - } - } - else {//a suffix prefixed acylium suffix - if (suffixFrag == null) { - throw new StructureBuildingException("OPSIN bug: ordering of elements in suffixRules.xml wrong; changeCharge found before addGroup"); - } - Set bonds = state.fragManager.getInterFragmentBonds(suffixFrag); - if (bonds.size() != 1) { - throw new StructureBuildingException("OPSIN bug: Wrong number of bonds between suffix and group"); - } - for (Bond bond : bonds) { - if (bond.getFromAtom().getFrag() == suffixFrag) { - bond.getFromAtom().addChargeAndProtons(chargeChange, protonChange); - } else { - bond.getToAtom().addChargeAndProtons(chargeChange, protonChange); - } - } - } - } else if (suffixRuleTagName.equals(SUFFIXRULES_SETOUTATOM_EL)) { - int outValency = suffixRuleTag.getAttribute(SUFFIXRULES_OUTVALENCY_ATR) != null ? Integer.parseInt(suffixRuleTag.getAttributeValue(SUFFIXRULES_OUTVALENCY_ATR)) : 1; - if (suffix.getAttribute(SUFFIXPREFIX_ATR) == null) { - if (idOnParentFragToUse != 0) { - frag.addOutAtom(idOnParentFragToUse, outValency, true); - } else { - frag.addOutAtom(firstAtomID, outValency, false); - } - } else {//something like oyl on a ring, which means it is now carbonyl and the outAtom is on the suffix and not frag - if (suffixFrag == null) { - throw new StructureBuildingException("OPSIN bug: ordering of elements in suffixRules.xml wrong; setOutAtom found before addGroup"); - } - Set bonds = state.fragManager.getInterFragmentBonds(suffixFrag); - if (bonds.size() != 1) { - throw new StructureBuildingException("OPSIN bug: Wrong number of bonds between suffix and group"); - } - for (Bond bond : bonds) { - if (bond.getFromAtom().getFrag() == suffixFrag) { - suffixFrag.addOutAtom(bond.getFromAtom(), outValency, true); - } else { - suffixFrag.addOutAtom(bond.getToAtom(), outValency, true); - } - } - } - } else if (suffixRuleTagName.equals(SUFFIXRULES_ADDSUFFIXPREFIXIFNONEPRESENTANDCYCLIC_EL)) { - //already processed - } else if (suffixRuleTagName.equals(SUFFIXRULES_ADDFUNCTIONALATOMSTOHYDROXYGROUPS_EL)) { - //already processed - } else if (suffixRuleTagName.equals(SUFFIXRULES_CHARGEHYDROXYGROUPS_EL)) { - //already processed - } else if (suffixRuleTagName.equals(SUFFIXRULES_REMOVETERMINALOXYGEN_EL)) { - //already processed - } else if (suffixRuleTagName.equals(SUFFIXRULES_CONVERTHYDROXYGROUPSTOOUTATOMS_EL)) { - //already processed - } else if (suffixRuleTagName.equals(SUFFIXRULES_CONVERTHYDROXYGROUPSTOPOSITIVECHARGE_EL)) { - //already processed - } else { - throw new StructureBuildingException("Unknown suffix rule:" + suffixRuleTagName); - } - } - - if (suffixFrag != null) {//merge suffix frag and parent fragment - state.fragManager.removeAtomAndAssociatedBonds(suffixFrag.getFirstAtom());//the dummy R atom - Set suffixLocants = new HashSet(suffixFrag.getLocants()); - for (String suffixLocant : suffixLocants) { - if (Character.isDigit(suffixLocant.charAt(0))){//check that numeric locants do not conflict with the parent fragment e.g. hydrazide 2' with biphenyl 2' - if (frag.hasLocant(suffixLocant)){ - suffixFrag.getAtomByLocant(suffixLocant).removeLocant(suffixLocant); - } - } - } - state.fragManager.incorporateFragment(suffixFrag, frag); - if (CYCLEFORMER_SUBTYPE_VAL.equals(suffix.getAttributeValue(SUBTYPE_ATR))){ - CycleDetector.assignWhetherAtomsAreInCycles(frag); - } - } - } - } - - - private void processCycleFormingSuffix(Fragment suffixFrag, Fragment suffixableFragment, Element suffix) throws StructureBuildingException, ComponentGenerationException { - List rAtoms = new ArrayList(); - for (Atom a : suffixFrag.getAtomList()) { - if (a.getElement().equals("R")){ - rAtoms.add(a); - } - } - if (rAtoms.size() != 2){ - throw new ComponentGenerationException("OPSIN bug: Incorrect number of R atoms associated with cyclic suffix"); - } - if (rAtoms.get(0).getBonds().size() <= 0 || rAtoms.get(1).getBonds().size() <= 0) { - throw new ComponentGenerationException("OPSIN Bug: Dummy atoms in suffix should have at least one bond to them"); - } - - Atom parentAtom1; - Atom parentAtom2; - - String locant = suffix.getAttributeValue(LOCANT_ATR); - String locantId = suffix.getAttributeValue(LOCANTID_ATR); - if (locant != null){ - String[] locants = MATCH_COMMA.split(locant); - if (locants.length ==2){ - parentAtom1 = suffixableFragment.getAtomByLocantOrThrow(locants[0]); - parentAtom2 = suffixableFragment.getAtomByLocantOrThrow(locants[1]); - } - else if (locants.length ==1){ - parentAtom1 = suffixableFragment.getAtomByLocantOrThrow("1"); - parentAtom2 = suffixableFragment.getAtomByLocantOrThrow(locants[0]); - } - else{ - throw new ComponentGenerationException("Incorrect number of locants associated with cycle forming suffix, expected 2 found: " + locants.length); - } - } - else if (locantId !=null) { - String[] locantIds = MATCH_COMMA.split(locantId); - if (locantIds.length !=2){ - throw new ComponentGenerationException("OPSIN bug: Should be exactly 2 locants associated with a cyclic suffix"); - } - int firstIdInFragment = suffixableFragment.getIdOfFirstAtom(); - parentAtom1 = suffixableFragment.getAtomByIDOrThrow(firstIdInFragment + Integer.parseInt(locantIds[0]) -1); - parentAtom2 = suffixableFragment.getAtomByIDOrThrow(firstIdInFragment + Integer.parseInt(locantIds[1]) -1); - } - else{ - int chainLength = suffixableFragment.getChainLength(); - if (chainLength > 1 && chainLength == suffixableFragment.getAtomList().size()){ - parentAtom1 = suffixableFragment.getAtomByLocantOrThrow("1"); - parentAtom2 = suffixableFragment.getAtomByLocantOrThrow(String.valueOf(chainLength)); - } - else{ - throw new ComponentGenerationException("cycle forming suffix: " + suffix.getValue() +" should be locanted!"); - } - } - if (parentAtom1.equals(parentAtom2)){ - throw new ComponentGenerationException("cycle forming suffix: " + suffix.getValue() +" attempted to form a cycle involving the same atom twice!"); - } - - if (parentAtom2.getElement().equals("O")){//cyclic suffixes like lactone formally indicate the removal of hydroxy cf. 1979 rule 472.1 - //...although in most cases they are used on structures that don't actually have a hydroxy group - List neighbours = parentAtom2.getAtomNeighbours(); - if (neighbours.size()==1){ - List suffixNeighbours = rAtoms.get(1).getAtomNeighbours(); - if (suffixNeighbours.size()==1 && suffixNeighbours.get(0).getElement().equals("O")){ - state.fragManager.removeAtomAndAssociatedBonds(parentAtom2); - parentAtom2 = neighbours.get(0); - } - } - } - - makeBondsToSuffix(parentAtom1, rAtoms.get(0)); - makeBondsToSuffix(parentAtom2, rAtoms.get(1)); - state.fragManager.removeAtomAndAssociatedBonds(rAtoms.get(1)); - } - - /** - * Creates bonds between the parentAtom and the atoms connected to the R atoms. - * Removes bonds to the R atom - * @param parentAtom - * @param suffixRAtom - */ - private void makeBondsToSuffix(Atom parentAtom, Atom suffixRAtom) { - List bonds = new ArrayList(suffixRAtom.getBonds()); - for (Bond bondToSuffix : bonds) { - Atom suffixAtom = bondToSuffix.getOtherAtom(suffixRAtom); - state.fragManager.createBond(parentAtom, suffixAtom, bondToSuffix.getOrder()); - state.fragManager.removeBond(bondToSuffix); - } - } - - /** - * Preference is given to mono cation/anions as they are expected to be more likely - * Additionally, Typically if a locant has not been specified then it was intended to refer to a nitrogen even if the nitrogen is not at locant 1 e.g. isoquinolinium - * Hence preference is given to nitrogen atoms and then to non carbon atoms - * @param atomList - * @param chargeChange - * @param protonChange - */ - private void applyUnlocantedChargeModification(List atomList, int chargeChange, int protonChange) { - Atom likelyAtom = null; - Atom possibleHeteroatom = null; - Atom possibleCarbonAtom = null; - Atom possibleDiOrHigherIon = null; - for (Atom a : atomList) { - Integer[] stableValencies = ValencyChecker.getPossibleValencies(a.getElement(), a.getCharge() + chargeChange); - if (stableValencies == null) {//unstable valency so seems unlikely - continue; - } - String element = a.getElement(); - int resultantExpectedValency = (a.getLambdaConventionValency() ==null ? ValencyChecker.getDefaultValency(element) : a.getLambdaConventionValency()) + a.getProtonsExplicitlyAddedOrRemoved() + protonChange; - boolean matched = false; - for (Integer stableValency : stableValencies) { - if (stableValency ==resultantExpectedValency){ - matched =true; - break; - } - } - if (!matched){//unstable valency so seems unlikely - continue; - } - if (protonChange <0 && StructureBuildingMethods.calculateSubstitutableHydrogenAtoms(a)<=0){ - continue; - } - if (Math.abs(a.getCharge())==0){ - if (element.equals("N")){ - likelyAtom = a; - break; - } - else if (possibleHeteroatom ==null && !element.equals("C")){ - possibleHeteroatom= a; - } - else if (possibleCarbonAtom ==null){ - possibleCarbonAtom = a; - } - } - else if (possibleDiOrHigherIon ==null){ - possibleDiOrHigherIon = a; - } - } - if (likelyAtom == null) { - if (possibleHeteroatom !=null){ - likelyAtom = possibleHeteroatom; - } - else if (possibleCarbonAtom !=null){ - likelyAtom = possibleCarbonAtom; - } - else if (possibleDiOrHigherIon !=null){ - likelyAtom = possibleDiOrHigherIon; - } - else{ - likelyAtom = atomList.get(0); - } - } - likelyAtom.addChargeAndProtons(chargeChange, protonChange); - } /** * Converts a biochemical linkage description e.g. (1->4) into an O[1-9] locant @@ -4468,7 +4662,7 @@ */ private void processBiochemicalLinkageDescriptors(List substituents, List brackets) throws StructureBuildingException { for (Element substituent : substituents) { - List bioLinkLocants = XOMTools.getChildElementsWithTagName(substituent, BIOCHEMICALLINKAGE_EL); + List bioLinkLocants = substituent.getChildElements(BIOCHEMICALLINKAGE_EL); if (bioLinkLocants.size() > 0){ if (bioLinkLocants.size() > 1){ throw new RuntimeException("OPSIN Bug: More than 1 biochemical linkage locant associated with subsituent"); @@ -4479,14 +4673,11 @@ checkAndApplyFirstLocantOfBiochemicalLinkage(substituent, bioLinkLocantStr); int secondLocantStartPos = Math.max(bioLinkLocantStr.lastIndexOf('>'), bioLinkLocantStr.lastIndexOf('-')) + 1; String locantToConnectTo = bioLinkLocantStr.substring(secondLocantStartPos); - Element parent = (Element) substituent.getParent(); + Element parent = substituent.getParent(); Attribute locantAtr = new Attribute(LOCANT_ATR, "O" + locantToConnectTo); - Element elementAfterSubstituent = (Element) XOMTools.getNextSibling(substituent); - boolean hasAdjacentGroupToSubstitute = (elementAfterSubstituent !=null && - (elementAfterSubstituent.getLocalName().equals(SUBSTITUENT_EL) || - elementAfterSubstituent.getLocalName().equals(BRACKET_EL) || - elementAfterSubstituent.getLocalName().equals(ROOT_EL))); + Element elementAfterSubstituent = OpsinTools.getNextSibling(substituent); + boolean hasAdjacentGroupToSubstitute = isSubBracketOrRoot(elementAfterSubstituent); /* If a biochemical is not at the end of a scope but is preceded by substituents/brackets @@ -4495,44 +4686,47 @@ * Else the locant is assigned to the substituent */ boolean bracketAdded =false; - if (hasAdjacentGroupToSubstitute){ + if (hasAdjacentGroupToSubstitute) { //now find the brackets/substituents before this element - Element previous = (Element) XOMTools.getPreviousSibling(substituent); + Element previous = OpsinTools.getPreviousSibling(substituent); List previousElements = new ArrayList(); - while( previous !=null){ - if (!previous.getLocalName().equals(SUBSTITUENT_EL) && !previous.getLocalName().equals(BRACKET_EL)){ + while( previous != null) { + if (!previous.getName().equals(SUBSTITUENT_EL) && !previous.getName().equals(BRACKET_EL)) { break; } previousElements.add(previous); - previous = (Element) XOMTools.getPreviousSibling(previous); + previous = OpsinTools.getPreviousSibling(previous); } - if (previousElements.size() > 0 ){//an explicit bracket is needed + if (previousElements.size() > 0) {//an explicit bracket is needed Collections.reverse(previousElements); - Element bracket = new Element(BRACKET_EL); + Element bracket = new GroupingEl(BRACKET_EL); bracket.addAttribute(locantAtr); int indexToInsertAt = parent.indexOf(previousElements.get(0)); for (Element element : previousElements) { element.detach(); - bracket.appendChild(element); + bracket.addChild(element); } substituent.detach(); - bracket.appendChild(substituent); + bracket.addChild(substituent); parent.insertChild(bracket, indexToInsertAt); brackets.add(bracket); bracketAdded = true; + if (substituent.getAttribute(LOCANT_ATR) != null) { + throw new StructureBuildingException("Substituent with biochemical linkage descriptor should not also have a locant: " + substituent.getAttributeValue(LOCANT_ATR)); + } } } if (!bracketAdded) { Element elToAddAtrTo; - if (parent.getLocalName().equals(BRACKET_EL) && !hasAdjacentGroupToSubstitute){ + if (parent.getName().equals(BRACKET_EL) && !hasAdjacentGroupToSubstitute) { elToAddAtrTo = parent; } else{ elToAddAtrTo = substituent; } - if (elToAddAtrTo.getAttribute(LOCANT_ATR) !=null){ + if (elToAddAtrTo.getAttribute(LOCANT_ATR) !=null) { throw new StructureBuildingException("Substituent with biochemical linkage descriptor should not also have a locant: " + elToAddAtrTo.getAttributeValue(LOCANT_ATR)); } elToAddAtrTo.addAttribute(locantAtr); @@ -4542,14 +4736,14 @@ } for (Element bracket : brackets) { - List bioLinkLocants = XOMTools.getChildElementsWithTagName(bracket, BIOCHEMICALLINKAGE_EL); + List bioLinkLocants = bracket.getChildElements(BIOCHEMICALLINKAGE_EL); if (bioLinkLocants.size() > 0){ - if (bioLinkLocants.size() > 1){ + if (bioLinkLocants.size() > 1) { throw new RuntimeException("OPSIN Bug: More than 1 biochemical linkage locant associated with bracket"); } Element bioLinkLocant = bioLinkLocants.get(0); - Element substituent = (Element) XOMTools.getPreviousSibling(bioLinkLocant); - if (substituent == null || !substituent.getLocalName().equals(SUBSTITUENT_EL)){ + Element substituent = OpsinTools.getPreviousSibling(bioLinkLocant); + if (substituent == null || !substituent.getName().equals(SUBSTITUENT_EL)){ throw new RuntimeException("OPSIN Bug: Substituent expected before biochemical linkage locant"); } String bioLinkLocantStr = bioLinkLocant.getValue(); @@ -4566,9 +4760,16 @@ } } + private boolean isSubBracketOrRoot(Element element) { + return element !=null && + (element.getName().equals(SUBSTITUENT_EL) || + element.getName().equals(BRACKET_EL) || + element.getName().equals(ROOT_EL)); + } + private void checkAndApplyFirstLocantOfBiochemicalLinkage(Element substituent, String biochemicalLinkage) throws StructureBuildingException { Element group = substituent.getFirstChildElement(GROUP_EL); - Fragment frag = state.xmlFragmentMap.get(group); + Fragment frag = group.getFrag(); String firstLocant = biochemicalLinkage.substring(0, biochemicalLinkage.indexOf('-')); if (group.getAttributeValue(TYPE_ATR).equals(CARBOHYDRATE_TYPE_VAL)) { Atom anomericAtom = frag.getAtomByLocantOrThrow(firstLocant); @@ -4584,7 +4785,7 @@ } else{ Atom positionOfPhospho = frag.getAtomByLocantOrThrow("O" + firstLocant); - if (positionOfPhospho.getBonds().size() !=1){ + if (positionOfPhospho.getBondCount() !=1){ throw new StructureBuildingException(firstLocant + " should be the carbon to which a hydroxy group is attached!"); } if (frag.getOutAtomCount()==1){ @@ -4611,33 +4812,33 @@ private void moveErroneouslyPositionedLocantsAndMultipliers(List brackets) { for (int i = brackets.size()-1; i >=0; i--) { Element bracket =brackets.get(i); - Elements childElements = bracket.getChildElements(); + List childElements = bracket.getChildElements(); boolean hyphenPresent = false; int childCount = childElements.size(); if (childCount==2){ for (int j = childCount -1; j >=0; j--) { - if (childElements.get(j).getLocalName().equals(HYPHEN_EL)){ + if (childElements.get(j).getName().equals(HYPHEN_EL)){ hyphenPresent=true; } } } if (childCount==1 || hyphenPresent && childCount==2){ - Elements substituentContent = childElements.get(0).getChildElements(); + List substituentContent = childElements.get(0).getChildElements(); if (substituentContent.size()>=2){ Element locant =null; Element multiplier =null; Element possibleMultiplier = substituentContent.get(0); - if (substituentContent.get(0).getLocalName().equals(LOCANT_EL)){//probably erroneous locant + if (substituentContent.get(0).getName().equals(LOCANT_EL)){//probably erroneous locant locant = substituentContent.get(0); possibleMultiplier = substituentContent.get(1); } - if (possibleMultiplier.getLocalName().equals(MULTIPLIER_EL)){//erroneously placed multiplier present + if (possibleMultiplier.getName().equals(MULTIPLIER_EL)){//erroneously placed multiplier present multiplier = possibleMultiplier; } if (locant!=null){ - if (multiplier==null || MATCH_COMMA.split(locant.getValue()).length == Integer.parseInt(multiplier.getAttributeValue(VALUE_ATR))){ + if (multiplier==null || locant.getValue().split(",").length == Integer.parseInt(multiplier.getAttributeValue(VALUE_ATR))){ locant.detach(); - XOMTools.insertBefore(childElements.get(0), locant); + OpsinTools.insertBefore(childElements.get(0), locant); } else{ continue; @@ -4645,12 +4846,51 @@ } if (multiplier !=null){ multiplier.detach(); - XOMTools.insertBefore(childElements.get(0), multiplier); + OpsinTools.insertBefore(childElements.get(0), multiplier); } } } } } + + + /** + * Checks for case where the term is a substituent that starts with two multipliers + * Interprets the first as a word level multiplier and the second as a substituent multiplier by adding an implicit bracket + * @param substituent + * @param brackets + */ + private void addImplicitBracketsWhenFirstSubstituentHasTwoMultipliers(Element substituent, List brackets) { + if (!substituent.getName().equals(SUBSTITUENT_EL)) { + return; + } + List multipliers = new ArrayList(); + for (int i = 0, len = substituent.getChildCount(); i < len; i++) { + Element child = substituent.getChild(i); + if (child.getName().equals(MULTIPLIER_EL)) { + multipliers.add(child); + } + else { + break; + } + } + if (multipliers.size() != 2) { + return; + } + Element bracket = new GroupingEl(BRACKET_EL); + bracket.addAttribute(new Attribute(TYPE_ATR, IMPLICIT_TYPE_VAL)); + Element parent = substituent.getParent(); + List elsToAddToBracket = parent.getChildElements(); + Element wordMultiplier = multipliers.get(0); + wordMultiplier.detach(); + bracket.addChild(wordMultiplier); + for (Element el : elsToAddToBracket) { + el.detach(); + bracket.addChild(el); + } + parent.addChild(bracket); + brackets.add(bracket); + } /** @@ -4663,13 +4903,13 @@ * @throws StructureBuildingException */ private void assignLocantsToMultipliedRootIfPresent(Element rightMostElement) throws ComponentGenerationException, StructureBuildingException { - Elements multipliers = rightMostElement.getChildElements(MULTIPLIER_EL); + List multipliers = rightMostElement.getChildElements(MULTIPLIER_EL); if(multipliers.size() == 1) { Element multiplier =multipliers.get(0); - if (XOMTools.getPrevious(multiplier)==null){ + if (OpsinTools.getPrevious(multiplier)==null){ throw new StructureBuildingException("OPSIN bug: Unacceptable input to function"); } - List locants = XOMTools.getChildElementsWithTagName(rightMostElement, MULTIPLICATIVELOCANT_EL); + List locants = rightMostElement.getChildElements(MULTIPLICATIVELOCANT_EL); if (locants.size()>1){ throw new ComponentGenerationException("OPSIN bug: Only none or one multiplicative locant expected"); } @@ -4679,7 +4919,7 @@ } else{ Element locantEl = locants.get(0); - String[] locantValues = MATCH_COMMA.split(locantEl.getValue()); + String[] locantValues = locantEl.getValue().split(","); if (locantValues.length == multiVal){ rightMostElement.addAttribute(new Attribute(INLOCANTS_ATR, locantEl.getValue())); locantEl.detach(); @@ -4689,8 +4929,8 @@ } } } - else if (rightMostElement.getLocalName().equals(BRACKET_EL)){ - assignLocantsToMultipliedRootIfPresent(((Element) rightMostElement.getChild(rightMostElement.getChildCount()-1))); + else if (rightMostElement.getName().equals(BRACKET_EL)){ + assignLocantsToMultipliedRootIfPresent(rightMostElement.getChild(rightMostElement.getChildCount()-1)); } } @@ -4699,33 +4939,32 @@ * Adds an implicit bracket in the case where two locants have been given. * One for the locanting of substituent on to the next substituent and one * for the locanting of this combined substituent onto a parent group - * @param substituents + * e.g. 5-p-hydroxyphenyl-1,2-dithiole-3-thione --> e.g. 5-(p-hydroxyphenyl)-1,2-dithiole-3-thione + * @param substituent * @param brackets */ - private void addImplicitBracketsInCaseWhereSubstituentHasTwoLocants(List substituents, List brackets) { - for (Element substituent : substituents) { - Element siblingSubstituent = (Element) XOMTools.getNextSibling(substituent); - if (siblingSubstituent !=null && siblingSubstituent.getLocalName().equals(SUBSTITUENT_EL)){ - List locants = getLocantsAtStartOfSubstituent(substituent); - if (locants.size() ==2 && locantsAreSingular(locants) - && getLocantsAtStartOfSubstituent(siblingSubstituent).size()==0){//e.g. 5-p-hydroxyphenyl-1,2-dithiole-3-thione - Element bracket = new Element(BRACKET_EL); - bracket.addAttribute(new Attribute(TYPE_ATR, IMPLICIT_TYPE_VAL)); - Element parent = (Element) substituent.getParent(); - int indexToInsertAt = parent.indexOf(substituent); - int elementsToMove = substituent.indexOf(locants.get(0))+1; - for (int i = 0; i < elementsToMove; i++) { - Element locantOrStereoToMove =(Element) substituent.getChild(0); - locantOrStereoToMove.detach(); - bracket.appendChild(locantOrStereoToMove); - } - substituent.detach(); - siblingSubstituent.detach(); - bracket.appendChild(substituent); - bracket.appendChild(siblingSubstituent); - parent.insertChild(bracket, indexToInsertAt); - brackets.add(bracket); - } + private void addImplicitBracketsWhenSubstituentHasTwoLocants(Element substituent, List brackets) { + Element siblingSubstituent = OpsinTools.getNextSibling(substituent); + if (siblingSubstituent != null && siblingSubstituent.getName().equals(SUBSTITUENT_EL)) { + List locants = getLocantsAtStartOfSubstituent(substituent); + if (locants.size() == 2 && locantsAreSingular(locants) + && getLocantsAtStartOfSubstituent(siblingSubstituent).size() == 0) { + Element bracket = new GroupingEl(BRACKET_EL); + bracket.addAttribute(new Attribute(TYPE_ATR, IMPLICIT_TYPE_VAL)); + Element parent = substituent.getParent(); + int indexToInsertAt = parent.indexOf(substituent); + int elementsToMove = substituent.indexOf(locants.get(0)) + 1; + for (int i = 0; i < elementsToMove; i++) { + Element locantOrStereoToMove = substituent.getChild(0); + locantOrStereoToMove.detach(); + bracket.addChild(locantOrStereoToMove); + } + substituent.detach(); + siblingSubstituent.detach(); + bracket.addChild(substituent); + bracket.addChild(siblingSubstituent); + parent.insertChild(bracket, indexToInsertAt); + brackets.add(bracket); } } } @@ -4737,11 +4976,11 @@ */ private List getLocantsAtStartOfSubstituent(Element substituent) { List locants = new ArrayList(); - Elements children = substituent.getChildElements(); - for (int i = 0; i < children.size(); i++) { - String currentElementName = children.get(i).getLocalName(); + for (int i = 0, len = substituent.getChildCount(); i < len; i++) { + Element child = substituent.getChild(i); + String currentElementName = child.getName(); if (currentElementName.equals(LOCANT_EL)){ - locants.add(children.get(i)); + locants.add(child); } else if (currentElementName.equals(STEREOCHEMISTRY_EL)){ //ignore @@ -4760,7 +4999,7 @@ */ private boolean locantsAreSingular(List locants) { for (Element locant : locants) { - if (MATCH_COMMA.split(locant.getValue()).length > 1){ + if (locant.getValue().split(",").length > 1){ return false; } } @@ -4778,33 +5017,33 @@ * @throws StructureBuildingException */ private void assignLocantsAndMultipliers(Element subOrBracket) throws ComponentGenerationException, StructureBuildingException { - List locants = XOMTools.getChildElementsWithTagName(subOrBracket, LOCANT_EL); - int multiplier =1; - List multipliers = XOMTools.getChildElementsWithTagName(subOrBracket, MULTIPLIER_EL); - Element parentElem =(Element)subOrBracket.getParent(); - boolean oneBelowWordLevel = parentElem.getLocalName().equals(WORD_EL); + List locants = subOrBracket.getChildElements(LOCANT_EL); + int multiplier = 1; + List multipliers = subOrBracket.getChildElements(MULTIPLIER_EL); + Element parentElem = subOrBracket.getParent(); + boolean oneBelowWordLevel = parentElem.getName().equals(WORD_EL); Element groupIfPresent = subOrBracket.getFirstChildElement(GROUP_EL); - if (multipliers.size()>0){ - if (multipliers.size()>1){ - throw new ComponentGenerationException(subOrBracket.getLocalName() +" has multiple multipliers, unable to determine meaning!"); + if (multipliers.size() > 0) { + if (multipliers.size() > 1){ + throw new ComponentGenerationException(subOrBracket.getName() +" has multiple multipliers, unable to determine meaning!"); } if (oneBelowWordLevel && - XOMTools.getNextSibling(subOrBracket) == null && - XOMTools.getPreviousSibling(subOrBracket) == null) { + OpsinTools.getNextSibling(subOrBracket) == null && + OpsinTools.getPreviousSibling(subOrBracket) == null) { return;//word level multiplier } multiplier = Integer.parseInt(multipliers.get(0).getAttributeValue(VALUE_ATR)); subOrBracket.addAttribute(new Attribute(MULTIPLIER_ATR, multipliers.get(0).getAttributeValue(VALUE_ATR))); - //multiplier is INTENTIONALLY not detached. As brackets/subs are only multiplied later on it is neccesary at that stage to determine what elements (if any) are in front of the multiplier + //multiplier is INTENTIONALLY not detached. As brackets/subs are only multiplied later on it is necessary at that stage to determine what elements (if any) are in front of the multiplier if (groupIfPresent !=null && PERHALOGENO_SUBTYPE_VAL.equals(groupIfPresent.getAttributeValue(SUBTYPE_ATR))){ throw new StructureBuildingException(groupIfPresent.getValue() +" cannot be multiplied"); } } if(locants.size() > 0) { - if (multiplier==1 && oneBelowWordLevel && XOMTools.getPreviousSibling(subOrBracket)==null){//locant might be word Level locant + if (multiplier==1 && oneBelowWordLevel && OpsinTools.getPreviousSibling(subOrBracket)==null){//locant might be word Level locant if (wordLevelLocantsAllowed(subOrBracket, locants.size())){//something like S-ethyl or S-(2-ethylphenyl) or S-4-tert-butylphenyl Element locant = locants.remove(0); - if (MATCH_COMMA.split(locant.getValue()).length!=1){ + if (locant.getValue().split(",").length!=1){ throw new ComponentGenerationException("Multiplier and locant count failed to agree; All locants could not be assigned!"); } parentElem.addAttribute(new Attribute(LOCANT_ATR, locant.getValue())); @@ -4814,26 +5053,26 @@ } } } - if (subOrBracket.getLocalName().equals(ROOT_EL)){ + if (subOrBracket.getName().equals(ROOT_EL)){ locantsToDebugString(locants); throw new ComponentGenerationException(locantsToDebugString(locants)); } - if (locants.size()!=1){ + if (locants.size() != 1){ throw new ComponentGenerationException(locantsToDebugString(locants)); } Element locantEl = locants.get(0); - String[] locantValues = MATCH_COMMA.split(locantEl.getValue()); + String[] locantValues = locantEl.getValue().split(","); if (multiplier != locantValues.length){ throw new ComponentGenerationException("Multiplier and locant count failed to agree; All locants could not be assigned!"); } - Element parent =(Element) subOrBracket.getParent(); + Element parent = subOrBracket.getParent(); //attempt to find cases where locant will not be utilised. A special case is made for carbonyl derivatives //e.g. 1H-2-benzopyran-1,3,4-trione 4-[N-(4-chlorophenyl)hydrazone] - if (!parent.getLocalName().equals(WORD_EL) || !parent.getAttributeValue(TYPE_ATR).equals(WordType.full.toString()) || !state.currentWordRule.equals(WordRule.carbonylDerivative)){ - Elements children =parent.getChildElements(); + if (!parent.getName().equals(WORD_EL) || !parent.getAttributeValue(TYPE_ATR).equals(WordType.full.toString()) || !state.currentWordRule.equals(WordRule.carbonylDerivative)){ + List children =parent.getChildElements(); boolean foundSomethingToSubstitute =false; for (int i = parent.indexOf(subOrBracket) +1 ; i < children.size(); i++) { - if (!children.get(i).getLocalName().equals(HYPHEN_EL)){ + if (!children.get(i).getName().equals(HYPHEN_EL)){ foundSomethingToSubstitute = true; } } @@ -4849,11 +5088,11 @@ } } - private String locantsToDebugString(List locants) { - StringBuilder message = new StringBuilder("Unable to assign all locants. "); - message.append((locants.size() > 1) ? "These locants " : "This locant "); - message.append((locants.size() > 1) ? "were " : "was "); - message.append("not assigned: "); + private String locantsToDebugString(List locants) { + StringBuilder message = new StringBuilder("Unable to assign all locants. "); + message.append((locants.size() > 1) ? "These locants " : "This locant "); + message.append((locants.size() > 1) ? "were " : "was "); + message.append("not assigned: "); for(Element locant : locants) { message.append(locant.getValue()); message.append(" "); @@ -4863,25 +5102,32 @@ - private boolean wordLevelLocantsAllowed(Element subOrBracket, int numberOflocants) { - Element parentElem =(Element)subOrBracket.getParent(); + private boolean wordLevelLocantsAllowed(Element subBracketOrRoot, int numberOflocants) { + Element parentElem = subBracketOrRoot.getParent(); if (WordType.valueOf(parentElem.getAttributeValue(TYPE_ATR))==WordType.substituent - && (XOMTools.getNextSibling(subOrBracket)==null || numberOflocants>=2)){ + && (OpsinTools.getNextSibling(subBracketOrRoot)==null || numberOflocants>=2)){ if (state.currentWordRule == WordRule.ester || state.currentWordRule == WordRule.functionalClassEster || state.currentWordRule == WordRule.multiEster || state.currentWordRule == WordRule.acetal){ return true; } } - if ((state.currentWordRule == WordRule.potentialBiochemicalEster || - (state.currentWordRule == WordRule.ester && (XOMTools.getNextSibling(subOrBracket)==null || numberOflocants>=2))) - && parentElem.getLocalName().equals(WORD_EL)){ - Element wordRule = (Element) parentElem.getParent(); - Elements words = wordRule.getChildElements(WORD_EL); + if ((state.currentWordRule == WordRule.potentialAlcoholEster || state.currentWordRule == WordRule.amineDiConjunctiveSuffix || + (state.currentWordRule == WordRule.ester && (OpsinTools.getNextSibling(subBracketOrRoot)==null || numberOflocants>=2))) + && parentElem.getName().equals(WORD_EL)){ + Element wordRule = parentElem.getParent(); + List words = wordRule.getChildElements(WORD_EL); Element ateWord = words.get(words.size()-1); if (parentElem==ateWord){ return true; } } - + if (state.currentWordRule == WordRule.acidReplacingFunctionalGroup && parentElem.getName().equals(WORD_EL) && + (OpsinTools.getNextSibling(subBracketOrRoot)==null || numberOflocants>=2)) { + //e.g. diphosphoric acid 1,3-di(ethylamide) + if (parentElem.getParent().indexOf(parentElem) > 0){ + return true; + } + } + return false; } @@ -4889,69 +5135,83 @@ * If a word level multiplier is present e.g. diethyl butandioate then this is processed to ethyl ethyl butandioate * If wordCount is 1 then an exception is thrown if a multiplier is encountered e.g. triphosgene parsed as tri phosgene * @param word + * @param roots * @param wordCount * @throws StructureBuildingException * @throws ComponentGenerationException */ - private void processWordLevelMultiplierIfApplicable(Element word, int wordCount) throws StructureBuildingException, ComponentGenerationException { - if (word.getChildCount()==1){ - Element subOrBracket = (Element) word.getChild(0); - Element multiplier = subOrBracket.getFirstChildElement(MULTIPLIER_EL); - if (multiplier !=null){ - int multiVal =Integer.parseInt(multiplier.getAttributeValue(VALUE_ATR)); - Elements locants =subOrBracket.getChildElements(LOCANT_EL); - boolean assignLocants =false; - boolean wordLevelLocants = wordLevelLocantsAllowed(subOrBracket, locants.size());//something like O,S-dimethyl phosphorothioate - if (locants.size()>1){ + private void processWordLevelMultiplierIfApplicable(Element word, List roots, int wordCount) throws StructureBuildingException, ComponentGenerationException { + if (word.getChildCount() == 1){ + Element firstSubBrackOrRoot = word.getChild(0); + Element multiplier = firstSubBrackOrRoot.getFirstChildElement(MULTIPLIER_EL); + if (multiplier != null) { + int multiVal = Integer.parseInt(multiplier.getAttributeValue(VALUE_ATR)); + List locants = firstSubBrackOrRoot.getChildElements(LOCANT_EL); + boolean assignLocants = false; + boolean wordLevelLocants = wordLevelLocantsAllowed(firstSubBrackOrRoot, locants.size());//something like O,S-dimethyl phosphorothioate + if (locants.size() > 1) { throw new ComponentGenerationException("Unable to assign all locants"); } String[] locantValues = null; - if (locants.size()==1){ - locantValues = MATCH_COMMA.split(locants.get(0).getValue()); + if (locants.size() == 1) { + locantValues = locants.get(0).getValue().split(","); if (locantValues.length == multiVal){ - assignLocants=true; + assignLocants = true; locants.get(0).detach(); if (wordLevelLocants){ word.addAttribute(new Attribute(LOCANT_ATR, locantValues[0])); } else{ - throw new ComponentGenerationException(locantsToDebugString(OpsinTools.elementsToElementArrayList(locants))); + throw new ComponentGenerationException(locantsToDebugString(locants)); } } else{ throw new ComponentGenerationException("Unable to assign all locants"); } } - if (multiplier.getValue().equals("non")){ - throw new StructureBuildingException("\"non\" probably means \"not\". If a multiplier of value 9 was intended \"nona\" should be used"); - } - if (wordCount ==1){ + checkForNonConfusedWithNona(multiplier); + if (wordCount == 1) { if (!isMonoFollowedByElement(multiplier, multiVal)){ throw new StructureBuildingException("Unexpected multiplier found at start of word. Perhaps the name is trivial e.g. triphosgene"); } } - if (multiVal ==1){//mono + if (multiVal == 1) {//mono return; } List elementsNotToBeMultiplied = new ArrayList();//anything before the multiplier - for (int i = subOrBracket.indexOf(multiplier) -1 ; i >=0 ; i--) { - Element el = (Element) subOrBracket.getChild(i); + for (int i = firstSubBrackOrRoot.indexOf(multiplier) -1 ; i >=0 ; i--) { + Element el = firstSubBrackOrRoot.getChild(i); el.detach(); elementsNotToBeMultiplied.add(el); } multiplier.detach(); - for(int i=multiVal -1; i>=1; i--) { + for(int i= multiVal -1; i>=1; i--) { Element clone = state.fragManager.cloneElement(state, word); if (assignLocants){ clone.getAttribute(LOCANT_ATR).setValue(locantValues[i]); } - XOMTools.insertAfter(word, clone); + OpsinTools.insertAfter(word, clone); } for (Element el : elementsNotToBeMultiplied) {//re-add anything before multiplier to original word - subOrBracket.insertChild(el, 0); + firstSubBrackOrRoot.insertChild(el, 0); } } } + else if (roots.size() == 1) { + if (OpsinTools.getDescendantElementsWithTagName(roots.get(0), FRACTIONALMULTIPLIER_EL).size() > 0){ + throw new StructureBuildingException("Unexpected fractional multiplier found within chemical name"); + } + } + } + + private void checkForNonConfusedWithNona(Element multiplier) throws StructureBuildingException { + if (multiplier.getValue().equals("non")){ + String subsequentUnsemanticToken = multiplier.getAttributeValue(SUBSEQUENTUNSEMANTICTOKEN_ATR); + if (subsequentUnsemanticToken !=null && subsequentUnsemanticToken.toLowerCase(Locale.ROOT).startsWith("a")){ + return; + } + throw new StructureBuildingException("\"non\" probably means \"not\". If a multiplier of value 9 was intended \"nona\" should be used"); + } } /** @@ -4962,8 +5222,8 @@ */ private boolean isMonoFollowedByElement(Element multiplier, int multiVal) { if (multiVal ==1){ - Element possibleElement = (Element) XOMTools.getNextSibling(multiplier); - if (possibleElement != null && possibleElement.getLocalName().equals(GROUP_EL) && + Element possibleElement = OpsinTools.getNextSibling(multiplier); + if (possibleElement != null && possibleElement.getName().equals(GROUP_EL) && (ELEMENTARYATOM_SUBTYPE_VAL.equals(possibleElement.getAttributeValue(SUBTYPE_ATR)) || possibleElement.getValue().equals("hydrogen"))){ return true; } diff -Nru opsin-1.5.0/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/CycleDetector.java opsin-2.3.1/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/CycleDetector.java --- opsin-1.5.0/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/CycleDetector.java 2013-07-21 14:02:29.000000000 +0000 +++ opsin-2.3.1/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/CycleDetector.java 2017-07-23 20:55:18.000000000 +0000 @@ -1,8 +1,9 @@ package uk.ac.cam.ch.wwmm.opsin; +import java.util.ArrayDeque; import java.util.ArrayList; +import java.util.Deque; import java.util.LinkedHashSet; -import java.util.LinkedList; import java.util.List; import java.util.Set; @@ -20,52 +21,73 @@ * @param frag */ static void assignWhetherAtomsAreInCycles(Fragment frag) { - List atomList =frag.getAtomList(); + List atomList = frag.getAtomList(); for (Atom atom : atomList) { atom.setAtomIsInACycle(false); atom.setProperty(Atom.VISITED, null); } for (Atom a : atomList) {//as OPSIN does not disallow disconnected sections within a single "fragment" (e.g. in suffixes) for vigorousness this for loop is required - if(a.getProperty(Atom.VISITED)==null){//typically for all but the first atom this will be true + if(a.getProperty(Atom.VISITED) == null){//true for only the first atom in a fully connected molecule traverseRings(a, null, 0); } } } private static int traverseRings(Atom currentAtom, Atom previousAtom, int depth){ - if(currentAtom.getProperty(Atom.VISITED)!=null){ - return currentAtom.getProperty(Atom.VISITED); + Integer previouslyAssignedDepth = currentAtom.getProperty(Atom.VISITED); + if(previouslyAssignedDepth != null){ + return previouslyAssignedDepth; } currentAtom.setProperty(Atom.VISITED, depth); - int result = depth+1; - List neighbours = currentAtom.getAtomNeighbours(); - for (Atom neighbour : neighbours) { - if (neighbour.equals(previousAtom)){ - continue; + List equivalentAtoms = new ArrayList(); + equivalentAtoms.add(currentAtom); + + List neighbours; + for(;;) { + //Non-recursively process atoms in a chain + //add the atoms in the chain to equivalentAtoms as either all or none of them are in a ring + neighbours = currentAtom.getAtomNeighbours(); + neighbours.remove(previousAtom); + if (neighbours.size() != 1) { + break; } - int temp = traverseRings(neighbour, currentAtom, depth+1); - if( temp <= depth) { - result = Math.min(result, temp); + Atom nextAtom = neighbours.get(0); + if (nextAtom.getProperty(Atom.VISITED) != null) { + //chain reached a previously visited atom, must be a ring + break; } + previousAtom = currentAtom; + currentAtom = nextAtom; + equivalentAtoms.add(currentAtom); + currentAtom.setProperty(Atom.VISITED, ++depth); + } + + int result = depth + 1; + for (Atom neighbour : neighbours) { + int temp = traverseRings(neighbour, currentAtom, depth + 1); + result = Math.min(result, temp); } - if( result <= depth ){ + if (result < depth){ + for (Atom a : equivalentAtoms) { + a.setAtomIsInACycle(true); + } + } else if (result == depth) { currentAtom.setAtomIsInACycle(true); } return result; - } private static class PathSearchState{ final Atom currentAtom; - final LinkedList orderAtomsVisited; - public PathSearchState(Atom currentAtom, LinkedList orderAtomsVisited ) { + final List orderAtomsVisited; + public PathSearchState(Atom currentAtom, List orderAtomsVisited ) { this.currentAtom = currentAtom; this.orderAtomsVisited = orderAtomsVisited; } Atom getCurrentAtom() { return currentAtom; } - LinkedList getOrderAtomsVisited() { + List getOrderAtomsVisited() { return orderAtomsVisited; } } @@ -79,11 +101,11 @@ */ static List> getPathBetweenAtomsUsingBonds(Atom a1, Atom a2, Set peripheryBonds){ List> paths = new ArrayList>(); - LinkedList stateStack = new LinkedList(); - stateStack.add(new PathSearchState(a1, new LinkedList())); + Deque stateStack = new ArrayDeque(); + stateStack.add(new PathSearchState(a1, new ArrayList())); while (stateStack.size()>0){ PathSearchState state =stateStack.removeLast();//depth first traversal - LinkedList orderAtomsVisited = state.getOrderAtomsVisited(); + List orderAtomsVisited = state.getOrderAtomsVisited(); Atom nextAtom = state.getCurrentAtom(); orderAtomsVisited.add(nextAtom); Set neighbourBonds = new LinkedHashSet(nextAtom.getBonds()); @@ -97,7 +119,7 @@ paths.add(new ArrayList(orderAtomsVisited.subList(1, orderAtomsVisited.size()))); } else{//add atom to stack, its neighbours will be recursively investigated shortly - stateStack.add(new PathSearchState(neighbour, new LinkedList(orderAtomsVisited))); + stateStack.add(new PathSearchState(neighbour, new ArrayList(orderAtomsVisited))); } } } diff -Nru opsin-1.5.0/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/CyclicAtomList.java opsin-2.3.1/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/CyclicAtomList.java --- opsin-1.5.0/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/CyclicAtomList.java 2013-07-21 14:02:29.000000000 +0000 +++ opsin-2.3.1/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/CyclicAtomList.java 2017-07-23 20:55:18.000000000 +0000 @@ -4,18 +4,18 @@ /** * Convenience class for iterating over a list of atoms that form a ring - * Doing getNext when the indice is the final atom in the list will return the first atom - * Doing getPrevious when the indice is the first atom in the list will return the final atom + * Doing getNext when the index is the final atom in the list will return the first atom + * Doing getPrevious when the index is the first atom in the list will return the final atom * @author dl387 * */ class CyclicAtomList{ - private int indice = -1; + private int index = -1; private final List atomList; /** * Construct a cyclicAtomList from an atomList - * Indice defaults to -1 + * Index defaults to -1 * @param atomList */ CyclicAtomList(List atomList) { @@ -24,14 +24,24 @@ /** * Construct a cyclicAtomList from an atomList - * The second parameter sets the current indice + * The second parameter sets the current index * @param atomList - * @param indice - * @throws StructureBuildingException + * @param index */ - CyclicAtomList(List atomList, int indice) throws StructureBuildingException { + CyclicAtomList(List atomList, int index) { this.atomList = atomList; - setIndice(indice); + setIndex(index); + } + + /** + * Returns the number of elements in this list. If this list contains more + * than Integer.MAX_VALUE elements, returns + * Integer.MAX_VALUE. + * + * @return the number of elements in this list + */ + int size() { + return atomList.size(); } /** @@ -45,59 +55,86 @@ } /** - * Return the current indice in the list + * Return the current index in the list * @return */ - int getIndice() { - return indice; + int getIndex() { + return index; } /** - * Set the current indice - * @param indice - * @throws StructureBuildingException + * Set the current index + * @param index */ - void setIndice(int indice) throws StructureBuildingException{ - if (indice >= atomList.size()){ - throw new StructureBuildingException("Specified indice is not within ringAtom list"); + void setIndex(int index) { + if (index >= atomList.size()){ + throw new IllegalArgumentException("Specified index is not within ringAtom list"); } - this.indice =indice; + this.index = index; } /** + * Increments and returns the atom at the new index in the list (next atom) + * When the index is the final atom in the list will return the first atom + * @return + */ + Atom next() { + int tempIndex = index + 1; + if (tempIndex >= atomList.size()){ + tempIndex = 0; + } + index = tempIndex; + return atomList.get(index); + } + + /** + * Decrements and returns the atom at the new index in the list (previous atom) + * when the index is the first atom in the list will return the final atom + * @return + */ + Atom previous() { + int tempIndex = index - 1; + if (tempIndex < 0){ + tempIndex = atomList.size() -1 ; + } + index = tempIndex; + return atomList.get(index); + } + + /** * Returns the next atom in the list - * When the indice is the final atom in the list will return the first atom + * When the index is the final atom in the list will return the first atom + * Doesn't effect the list * @return */ - Atom getNext() { - int tempIndice = indice + 1; - if (tempIndice >= atomList.size()){ - tempIndice=0; + Atom peekNext() { + int tempIndex = index + 1; + if (tempIndex >= atomList.size()){ + tempIndex = 0; } - indice =tempIndice; - return atomList.get(indice); + return atomList.get(tempIndex); } /** * Returns the previous atom in the list - * when the indice is the first atom in the list will return the final atom + * when the index is the first atom in the list will return the final atom + * Doesn't effect the list * @return */ - Atom getPrevious() { - int tempIndice = indice - 1; - if (tempIndice < 0){ - tempIndice = atomList.size() -1 ; + Atom peekPrevious() { + int tempIndex = index - 1; + if (tempIndex < 0){ + tempIndex = atomList.size() -1 ; } - indice =tempIndice; - return atomList.get(indice); + return atomList.get(tempIndex); } /** - * Returns the atom corresponding to the current indice - * Note that CycliAtomLists have a default indice of -1 + * Returns the atom corresponding to the current index + * Note that CycliAtomLists have a default index of -1 * @return */ Atom getCurrent() { - return atomList.get(indice); + return atomList.get(index); } } diff -Nru opsin-1.5.0/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/Element.java opsin-2.3.1/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/Element.java --- opsin-1.5.0/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/Element.java 1970-01-01 00:00:00.000000000 +0000 +++ opsin-2.3.1/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/Element.java 2017-07-23 20:55:18.000000000 +0000 @@ -0,0 +1,229 @@ +package uk.ac.cam.ch.wwmm.opsin; + +import java.util.ArrayList; +import java.util.List; + +abstract class Element { + + protected String name; + protected Element parent = null; + protected final List attributes = new ArrayList(); + + Element(String name) { + this.name = name; + } + + void addAttribute(Attribute attribute) { + attributes.add(attribute); + } + + void addAttribute(String atrName, String atrValue) { + attributes.add(new Attribute(atrName, atrValue)); + } + + /** + * Adds a child element + * @param child + */ + abstract void addChild(Element child); + + /** + * Creates a deep copy with no parent + */ + abstract Element copy(); + + void detach() { + if (parent != null) { + parent.removeChild(this); + } + } + + Attribute getAttribute(int index) { + return attributes.get(index); + } + + /** + * Returns the attribute with the given name + * or null if the attribute doesn't exist + * @param name + * @return + */ + Attribute getAttribute(String name) { + for (int i = 0, len = attributes.size(); i < len; i++) { + Attribute a = attributes.get(i); + if (a.getName().equals(name)) { + return a; + } + } + return null; + } + + int getAttributeCount() { + return attributes.size(); + } + + /** + * Returns the value of the attribute with the given name + * or null if the attribute doesn't exist + * @param name + * @return + */ + String getAttributeValue(String name) { + Attribute attribute = getAttribute(name); + if (attribute != null) { + return attribute.getValue(); + } + return null; + } + + /** + * Returns the child at the given index in the children list + * @param index + * @return + */ + abstract Element getChild(int index); + + /** + * Returns the number of children + * @return + */ + abstract int getChildCount(); + + /** + * Returns a copy of the child elements + * + * @return + */ + abstract List getChildElements(); + + /** + * Gets child elements with this name (in iteration order) + * @param name + * @return + */ + abstract List getChildElements(String name); + + /** + * Returns the first child element with the specified name + * + * @param name + * @return + */ + abstract Element getFirstChildElement(String name); + + /** + * Returns the fragment associated with this element (only applicable to tokens) + * @return + */ + Fragment getFrag() { + throw new UnsupportedOperationException("Only tokens can have associated fragments"); + } + + String getName() { + return name; + } + + Element getParent() { + return this.parent; + } + + abstract String getValue(); + + /** + * Returns the index of the given child in the children list (or -1 if it isn't a child) + * @param child + * @return + */ + abstract int indexOf(Element child); + + /** + * Inserts the element at the given index in the children list + * @param child + * @param index + */ + abstract void insertChild(Element child, int index); + + boolean removeAttribute(Attribute attribute) { + return attributes.remove(attribute); + } + + /** + * Removes the given child element + * @param child + * @return + */ + abstract boolean removeChild(Element child); + + /** + * Removes the element at the given index in the children list + * @param index + * @return + */ + abstract Element removeChild(int index); + + /** + * Replaces a child element with another element + * @param oldChild + * @param newChild + */ + abstract void replaceChild(Element oldChild, Element newChild); + + /** + * Sets the fragment associated with this element (only applicable to tokens!) + * @param frag + */ + void setFrag(Fragment frag) { + throw new UnsupportedOperationException("Only tokens can have associated fragments"); + } + + void setName(String name) { + this.name = name; + } + + void setParent(Element newParentEl) { + this.parent = newParentEl; + } + + abstract void setValue(String text); + + public String toString() { + return toXML(); + } + + String toXML() { + return toXML(0).toString(); + } + + private StringBuilder toXML(int indent) { + StringBuilder result = new StringBuilder(); + for (int i = 0; i < indent; i++) { + result.append(" "); + } + result.append('<'); + result.append(name); + for (Attribute atr : attributes) { + result.append(' '); + result.append(atr.toXML()); + } + result.append('>'); + if (getChildCount() > 0){ + for (Element child : getChildElements()) { + result.append(OpsinTools.NEWLINE); + result.append(child.toXML(indent + 1)); + } + result.append(OpsinTools.NEWLINE); + for (int i = 0; i < indent; i++) { + result.append(" "); + } + } + else{ + result.append(getValue()); + } + result.append("'); + + return result; + } + +} diff -Nru opsin-1.5.0/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/Fragment.java opsin-2.3.1/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/Fragment.java --- opsin-1.5.0/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/Fragment.java 2013-07-21 14:02:29.000000000 +0000 +++ opsin-2.3.1/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/Fragment.java 2017-07-23 20:55:18.000000000 +0000 @@ -7,13 +7,11 @@ import java.util.Iterator; import java.util.LinkedHashMap; import java.util.LinkedHashSet; -import java.util.LinkedList; import java.util.List; +import java.util.Map; import java.util.Set; import java.util.regex.Matcher; -import nu.xom.Attribute; -import nu.xom.Element; import static uk.ac.cam.ch.wwmm.opsin.OpsinTools.*; import static uk.ac.cam.ch.wwmm.opsin.XmlDeclarations.*; @@ -26,100 +24,61 @@ class Fragment { /**A mapping between IDs and the atoms in this fragment, by default is ordered by the order atoms are added to the fragment*/ - private final LinkedHashMap atomMapFromId = new LinkedHashMap(); + private final Map atomMapFromId = new LinkedHashMap(); /**Equivalent to and synced to atomMapFromId.values() */ private final Collection atomCollection = atomMapFromId.values(); /**A mapping between locants and the atoms in this fragment*/ - private final HashMap atomMapFromLocant = new HashMap(); + private final Map atomMapFromLocant = new HashMap(); /**The bonds in the fragment*/ private final Set bondSet = new LinkedHashSet(); - /**The type of the fragment, for the purpose of resolving suffixes*/ - private String type = ""; - - /**The subType of the fragment, for the purpose of resolving suffixes*/ - private String subType = ""; + /**The associated token element*/ + private Element tokenEl; /**The atoms that are used when this fragment is connected to another fragment. Unused outAtoms means that the fragment is a radical or an error has occurred * Initially empty */ - private final LinkedList outAtoms = new LinkedList(); + private final List outAtoms = new ArrayList(); /**The atoms that are used on this fragment to form things like esters * Initially empty */ - private final LinkedList functionalAtoms = new LinkedList(); + private final List functionalAtoms = new ArrayList(); - /**The atom that fragments connecting to this fragment connect to if a locant has not been specified - * Defaults to the first atom to be added to the fragment. This is typically the one with locant 1 - * but allows for fragments with no locants. Can be overridden*/ + /**The atom that fragments connecting to this fragment should connect to in preference + * e.g. for amino acids the alpha amino group + * Null by default*/ private Atom defaultInAtom = null; /**The atoms in the fragment that have been indicated to have hydrogen at the SMILES level.*/ private final List indicatedHydrogen = new ArrayList(); + + /**Pseudo atoms indicating start and end of polymer structure repeat unit*/ + private List polymerAttachmentPoints = null; - /**DO NOT CALL DIRECTLY EXCEPT FOR TESTING - * Makes an empty Fragment with a given type and subType. - * @param type The type of the fragment - * @param subType The subtype of the fragment - * @throws StructureBuildingException + /** + * DO NOT CALL DIRECTLY EXCEPT FOR TESTING + * Makes an empty Fragment associated with the given tokenEl + * @param tokenEl + */ + Fragment(Element tokenEl) { + this.tokenEl = tokenEl; + } + + /** + * DO NOT CALL DIRECTLY EXCEPT FOR TESTING + * Makes an empty Fragment with the given type + * + * @param type */ - Fragment(String type, String subType) throws StructureBuildingException { - if (type==null){ - throw new StructureBuildingException("Type specified for fragment is null"); - } - if (subType==null){ - throw new StructureBuildingException("subType specified for fragment is null"); - } - this.type = type; - this.subType = subType; - } - - /**DO NOT CALL DIRECTLY EXCEPT FOR TESTING - * Makes an empty fragment with no specified type.*/ - Fragment() {} - - /**Produces a CML element, corresponding to the molecule. The cml element contains - * a molecule, which contains an atomArray and bondArray filled with atoms and bonds. - * The molecule element has a dummy id of m1. - * @param chemicalName - * - * @return The CML element. - * @see Atom - * @see Bond - */ - Element toCMLMolecule(String chemicalName) { - Element cml = new Element("cml", CML_NAMESPACE); - cml.addAttribute(new Attribute("convention","conventions:molecular")); - cml.addNamespaceDeclaration("conventions", "http://www.xml-cml.org/convention/"); - cml.addNamespaceDeclaration("cmlDict", "http://www.xml-cml.org/dictionary/cml/"); - cml.addNamespaceDeclaration("nameDict", "http://www.xml-cml.org/dictionary/cml/name/"); - Element molecule = new Element("molecule", CML_NAMESPACE); - Element name = new Element("name", CML_NAMESPACE); - name.appendChild(chemicalName); - name.addAttribute(new Attribute("dictRef","nameDict:unknown")); - molecule.appendChild(name); - molecule.addAttribute(new Attribute("id", "m1")); - Element atomArray = new Element("atomArray", CML_NAMESPACE); - for(Atom atom : atomCollection) { - atomArray.appendChild(atom.toCMLAtom()); - } - Element bondArray = new Element("bondArray", CML_NAMESPACE); - for(Bond bond : bondSet) { - bondArray.appendChild(bond.toCMLBond()); - } - molecule.appendChild(atomArray); - molecule.appendChild(bondArray); - cml.appendChild(molecule); - return cml; + Fragment(String type) { + this.tokenEl = new TokenEl(""); + this.tokenEl.addAttribute(TYPE_ATR, type); } /**Adds an atom to the fragment and associates it with this fragment*/ void addAtom(Atom atom) { - if (defaultInAtom == null){//the first atom added becomes the defaultInAtom - defaultInAtom = atom; - } List locants =atom.getLocants(); for (String locant: locants) { atomMapFromLocant.put(locant, atom); @@ -127,8 +86,19 @@ atomMapFromId.put(atom.getID(), atom); atom.setFrag(this); } + + /** + * Return the number of atoms in the fragment + * @return + */ + int getAtomCount() { + return atomCollection.size(); + } - /**Gets atomList.*/ + /** + * Returns a copy of the fragment's atoms + * @return + */ List getAtomList() { return new ArrayList(atomCollection); } @@ -157,9 +127,8 @@ * * @param locant The locant to look for * @return The id of the found atom, or 0 if it is not found - * @throws StructureBuildingException */ - int getIDFromLocant(String locant) throws StructureBuildingException { + int getIDFromLocant(String locant) { Atom a = getAtomByLocant(locant); if (a != null){ return a.getID(); @@ -185,9 +154,8 @@ * * @param locant The locant to look for * @return The found atom, or null if it is not found - * @throws StructureBuildingException */ - Atom getAtomByLocant(String locant) throws StructureBuildingException { + Atom getAtomByLocant(String locant) { Atom a =atomMapFromLocant.get(locant); if (a != null){ return a; @@ -251,7 +219,7 @@ */ Bond findBond(int ID1, int ID2) { Atom a = atomMapFromId.get(ID1); - if (a!=null){ + if (a != null){ for (Bond b : a.getBonds()) { if((b.getFrom() == ID1 && b.getTo() == ID2) || (b.getTo() == ID1 && b.getFrom() == ID2)) { @@ -280,33 +248,59 @@ /**Works out how many atoms there are in the fragment there are * with consecutive locants, starting from 1 that are in a chain * - * @return The number of atoms in the locant chain. - * @throws StructureBuildingException + * @return The number of atoms in the locant chain */ - int getChainLength() throws StructureBuildingException { + int getChainLength() { int length = 0; - Atom next = getAtomByLocant(Integer.toString(length+1)); + Atom next = getAtomByLocant(Integer.toString(length + 1)); Atom previous = null; - while (next !=null){ - if (previous !=null && previous.getBondToAtom(next) == null){ + while (next != null){ + if (previous != null && previous.getBondToAtom(next) == null){ break; } length++; previous = next; - next = getAtomByLocant(Integer.toString(length+1)); + next = getAtomByLocant(Integer.toString(length + 1)); } return length; } - /**Gets type.*/ + /** + * Gets the type of the corresponding tokenEl + * Returns "" if undefined + * @return + */ String getType() { - return type; + String type = tokenEl.getAttributeValue(TYPE_ATR); + return type != null ? type : ""; } - /**Gets subType. - * @return subType*/ + /** + * Gets the subType of the corresponding tokenEl + * Returns "" if undefined + * @return + */ String getSubType() { - return subType; + String subType = tokenEl.getAttributeValue(SUBTYPE_ATR); + return subType != null ? subType : ""; + } + + /** + * Gets the associate tokenEl + * Whether or not this is a real token can be tested by whether it has a parent + * @return + */ + Element getTokenEl() { + return tokenEl; + } + + /** + * Sets the associated tokenEl + * Type/subType are inherited from the tokenEl + * @param tokenEl + */ + void setTokenEl(Element tokenEl) { + this.tokenEl = tokenEl; } /** @@ -350,7 +344,7 @@ /** * Includes the OutAtoms of a given fragment into this fragment * Note that no OutAtoms are created in doing this - * @param outAtoms + * @param frag */ void incorporateOutAtoms(Fragment frag) { outAtoms.addAll(frag.outAtoms); @@ -403,7 +397,7 @@ /** * Includes the FunctionalAtoms of a given fragment into this fragment * Note that no FunctionalAtoms are created in doing this - * @param functionalAtoms + * @param frag */ void incorporateFunctionalAtoms(Fragment frag) { functionalAtoms.addAll(frag.functionalAtoms); @@ -425,30 +419,30 @@ void removeFunctionalAtom(FunctionalAtom functionalAtom) { functionalAtoms.remove(functionalAtom); } + + List getPolymerAttachmentPoints() { + return polymerAttachmentPoints; + } + + void setPolymerAttachmentPoints(List polymerAttachmentPoints) { + this.polymerAttachmentPoints = polymerAttachmentPoints; + } /**Gets a list of atoms in the fragment that connect to a specified atom * * @param atom The reference atom * @return The list of atoms connected to the atom - * @throws StructureBuildingException */ - List getIntraFragmentAtomNeighbours(Atom atom) throws StructureBuildingException { - List results = new ArrayList(); - for(Bond b : atom.getBonds()) { - //recalled atoms will be null if they are not part of this fragment - if(b.getFromAtom() == atom) { - Atom a =getAtomByID(b.getTo()); - if (a!=null){ - results.add(a); - } - } else if(b.getToAtom() == atom) { - Atom a =getAtomByID(b.getFrom()); - if (a!=null){ - results.add(a); - } - } - else{ - throw new StructureBuildingException("A bond associated with an atom does not involve it"); + List getIntraFragmentAtomNeighbours(Atom atom) { + List results = new ArrayList(atom.getBondCount()); + for(Bond b : atom.getBonds()) { + Atom otherAtom = b.getOtherAtom(atom); + if (otherAtom == null) { + throw new RuntimeException("OPSIN Bug: A bond associated with an atom does not involve it"); + } + //If the other atom is in atomMapFromId then it is in this fragment + if (atomMapFromId.get(otherAtom.getID()) != null) { + results.add(otherAtom); } } return results; @@ -470,12 +464,12 @@ //recalled atoms will be null if they are not part of this fragment if(b.getFromAtom() == atom) { Atom a =getAtomByID(b.getTo()); - if (a!=null && !a.getType().equals(SUFFIX_TYPE_VAL)){ + if (a != null && !a.getType().equals(SUFFIX_TYPE_VAL)){ v += b.getOrder(); } } else if(b.getToAtom() == atom) { Atom a =getAtomByID(b.getFrom()); - if (a!=null && !a.getType().equals(SUFFIX_TYPE_VAL)){ + if (a != null && !a.getType().equals(SUFFIX_TYPE_VAL)){ v += b.getOrder(); } } @@ -509,7 +503,7 @@ atomMapFromLocant.remove(l); } if (defaultInAtom == atom){ - defaultInAtom = getFirstAtom(); + defaultInAtom = null; } } /** @@ -523,29 +517,17 @@ } return charge; } - - /** - * Sets the type of the fragment e.g. aromaticStem - * @param type - */ - void setType(String type) { - this.type = type; - } - /** - * Sets the subType of the fragment - * @param subType - */ - void setSubType(String subType) { - this.subType = subType; - } - Atom getDefaultInAtom() { return defaultInAtom; } void setDefaultInAtom(Atom inAtom) { - this.defaultInAtom=inAtom; + this.defaultInAtom = inAtom; + } + + Atom getDefaultInAtomOrFirstAtom() { + return defaultInAtom != null ? defaultInAtom : getFirstAtom(); } /** @@ -569,10 +551,9 @@ * Checks to see whether a locant is present on this fragment * @param locant * @return - * @throws StructureBuildingException */ - boolean hasLocant(String locant) throws StructureBuildingException { - return getAtomByLocant(locant)!=null; + boolean hasLocant(String locant) { + return getAtomByLocant(locant) != null; } @@ -592,108 +573,6 @@ indicatedHydrogen.add(atom); } - - Atom getAtomOrNextSuitableAtomOrThrow(Atom startingAtom, int additionalValencyRequired, boolean takeIntoAccountOutValency) throws StructureBuildingException { - Atom a =getAtomOrNextSuitableAtom(startingAtom, additionalValencyRequired, takeIntoAccountOutValency); - if (a==null){ - throw new StructureBuildingException("No suitable atom found"); - } - return a; - } - - /** - * Takes an id and additional valency required. Returns the atom associated with that id if adding the specified valency will not violate - * that atom type's maximum valency. - * If this is not possible it iterates sequentially through all atoms in the fragment till one is found - * Spare valency is initially taken into account so that the atom is not dearomatised - * If this is impossible to accomplish dearomatisation is done - * If an atom is still not found an exception is thrown - * atoms belonging to suffixes are never selected unless the original id specified was a suffix atom - * @param startingAtom - * @param additionalValencyRequired The increase in valency that will be required on the desired atom - * @param takeIntoAccountOutValency - * @return Atom - */ - Atom getAtomOrNextSuitableAtom(Atom startingAtom, int additionalValencyRequired, boolean takeIntoAccountOutValency) { - List atomList =getAtomList(); - Atom currentAtom = startingAtom; - int atomCounter=0; - int atomListPosition=atomList.indexOf(currentAtom); - int startingIndex =atomListPosition; - - do {//aromaticity preserved and standard valency assumed - atomCounter++; - if (atomListPosition >= atomList.size()){ - atomListPosition -=(atomList.size()); - } - currentAtom=atomList.get(atomListPosition); - if (FragmentTools.isCharacteristicAtom(currentAtom)){ - atomListPosition++; - continue; - } - int currentExpectedValency = currentAtom.determineValency(takeIntoAccountOutValency); - if(currentExpectedValency >= (currentAtom.getIncomingValency() + additionalValencyRequired + (currentAtom.hasSpareValency() ? 1 : 0) + (takeIntoAccountOutValency ? currentAtom.getOutValency() : 0))){ - return currentAtom; - } - atomListPosition++; - } - while(atomCounter < atomList.size()); - - atomListPosition =startingIndex; - atomCounter=0; - - do {//aromaticity preserved, standard valency assumed, non functional suffixes substitutable - atomCounter++; - if (atomListPosition >= atomList.size()){ - atomListPosition -=(atomList.size()); - } - currentAtom=atomList.get(atomListPosition); - if (FragmentTools.isFunctionalAtomOrAldehyde(currentAtom)){ - atomListPosition++; - continue; - } - int currentExpectedValency = currentAtom.determineValency(takeIntoAccountOutValency); - if(currentExpectedValency >= (currentAtom.getIncomingValency() + additionalValencyRequired + (currentAtom.hasSpareValency() ? 1 : 0) + (takeIntoAccountOutValency ? currentAtom.getOutValency() : 0))){ - return currentAtom; - } - atomListPosition++; - } - while(atomCounter < atomList.size()); - - atomListPosition =startingIndex; - atomCounter=0; - - do {//aromaticity preserved any suffix substitutable - atomCounter++; - if (atomListPosition >= atomList.size()){ - atomListPosition -=(atomList.size()); - } - currentAtom=atomList.get(atomListPosition); - - if(ValencyChecker.checkValencyAvailableForBond(currentAtom, additionalValencyRequired + (currentAtom.hasSpareValency() ? 1 : 0) + (takeIntoAccountOutValency ? currentAtom.getOutValency() : 0))){ - return currentAtom; - } - atomListPosition++; - } - while(atomCounter < atomList.size()); - - atomListPosition =startingIndex; - atomCounter=0; - do {//aromaticity dropped, anything substitutable - atomCounter++; - if (atomListPosition >= atomList.size()){ - atomListPosition -=(atomList.size()); - } - currentAtom=atomList.get(atomListPosition); - if(ValencyChecker.checkValencyAvailableForBond(currentAtom, additionalValencyRequired + (takeIntoAccountOutValency ? currentAtom.getOutValency() : 0))){ - return currentAtom; - } - atomListPosition++; - } - while(atomCounter < atomList.size()); - return null; - } - /** * Returns the id of the first atom in the fragment * @return @@ -722,7 +601,7 @@ * @throws StructureBuildingException */ void reorderAtomCollection(List atomList) throws StructureBuildingException { - if (atomMapFromId.size()!=atomList.size()){ + if (atomMapFromId.size() != atomList.size()){ throw new StructureBuildingException("atom list is not the same size as the number of atoms in the fragment"); } atomMapFromId.clear(); diff -Nru opsin-1.5.0/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/FragmentManager.java opsin-2.3.1/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/FragmentManager.java --- opsin-1.5.0/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/FragmentManager.java 2013-07-21 14:02:29.000000000 +0000 +++ opsin-2.3.1/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/FragmentManager.java 2017-07-23 20:55:18.000000000 +0000 @@ -1,18 +1,17 @@ package uk.ac.cam.ch.wwmm.opsin; +import static uk.ac.cam.ch.wwmm.opsin.XmlDeclarations.*; + import java.util.ArrayList; import java.util.Collections; import java.util.HashMap; -import java.util.HashSet; import java.util.LinkedHashMap; import java.util.LinkedHashSet; import java.util.List; import java.util.Map; +import java.util.Map.Entry; import java.util.Set; -import nu.xom.Element; - - /** Holds the Fragments during the construction of the molecule, * handles the building of new fragments and handles the creation/deletion of atoms/bonds * @@ -21,18 +20,20 @@ * */ class FragmentManager { - /** All of the atom-containing fragments in the molecule */ - private final Set fragPile; - /** All of the inter-fragment bonds */ - private final Set bondPile; + /** A mapping between fragments and inter fragment bonds */ - private final Map> fragToInterFragmentBond; + private final Map> fragToInterFragmentBond = new LinkedHashMap>(); + + /** All of the atom-containing fragments in the molecule */ + private final Set fragments = fragToInterFragmentBond.keySet(); + /** A builder for fragments specified as SMILES */ private final SMILESFragmentBuilder sBuilder; + /** A source of unique integers */ private final IDManager idManager; - /** Sets up a new Fragment mananger, containing no fragments. + /** Sets up a new Fragment manager, containing no fragments. * * @param sBuilder A SMILESFragmentBuilder - dependency injection. * @param idManager An IDManager. @@ -43,44 +44,45 @@ } this.sBuilder = sBuilder; this.idManager = idManager; - fragPile = new LinkedHashSet(); - bondPile = new LinkedHashSet(); - fragToInterFragmentBond = new HashMap>(); } /** Builds a fragment, based on an SMILES string + * The fragment will not correspond to a token * * @param smiles The fragment to build * @return The built fragment * @throws StructureBuildingException */ Fragment buildSMILES(String smiles) throws StructureBuildingException { - return buildSMILES(smiles, "", ""); + return buildSMILES(smiles, "", NONE_LABELS_VAL); } - + /** Builds a fragment, based on an SMILES string - * - * @param smiles The fragment to build - * @param type The fragment type - * @param labelMapping How to label the fragment - * @return The built fragment + * The fragment will not correspond to a token + * + * @param smiles + * @param type + * @param labelMapping + * @return * @throws StructureBuildingException */ Fragment buildSMILES(String smiles, String type, String labelMapping) throws StructureBuildingException { - return buildSMILES(smiles, type, "", labelMapping); + Fragment newFrag = sBuilder.build(smiles, type, labelMapping); + addFragment(newFrag); + return newFrag; } /** Builds a fragment, based on an SMILES string - * + * The fragment will correspond to the given tokenEl + * * @param smiles The fragment to build - * @param type The fragment type - * @param subType The fragment subType + * @param tokenEl The corresponding tokenEl * @param labelMapping How to label the fragment * @return The built fragment * @throws StructureBuildingException */ - Fragment buildSMILES(String smiles, String type, String subType, String labelMapping) throws StructureBuildingException { - Fragment newFrag = sBuilder.build(smiles, type, subType, labelMapping, this); + Fragment buildSMILES(String smiles, Element tokenEl, String labelMapping) throws StructureBuildingException { + Fragment newFrag = sBuilder.build(smiles, tokenEl, labelMapping); addFragment(newFrag); return newFrag; } @@ -88,31 +90,42 @@ /**Creates a new fragment, containing all of the atoms and bonds * of all of the other fragments - i.e. the whole molecule. This updates * which fragments the atoms think they are in to the new super fragment - * but does not remove change the contents of the original fragments. + * but does not change the original fragments. * Hence the original fragments remain associated with their atoms * Atoms and Bonds are not copied. * * @return The unified fragment - * @throws StructureBuildingException */ - Fragment getUnifiedFragment() throws StructureBuildingException { - Fragment outFrag = new Fragment(); - List fragments = new ArrayList(fragPile); - addFragment(outFrag); - for(Fragment f : fragments) { - incorporateFragment(f, outFrag);//merge all fragments into one + Fragment getUnifiedFragment() { + Fragment uniFrag = new Fragment(""); + for (Entry> entry : fragToInterFragmentBond.entrySet()) { + Fragment f = entry.getKey(); + Set interFragmentBonds = entry.getValue(); + for(Atom atom : f.getAtomList()) { + uniFrag.addAtom(atom); + } + for(Bond bond : f.getBondSet()) { + uniFrag.addBond(bond); + } + uniFrag.incorporateOutAtoms(f); + uniFrag.incorporateFunctionalAtoms(f); + + for (Bond interFragmentBond : interFragmentBonds) { + uniFrag.addBond(interFragmentBond); + } } - return outFrag; + addFragment(uniFrag); + return uniFrag; } /** Incorporates a fragment, usually a suffix, into a parent fragment * This does: * Imports all of the atoms and bonds from another fragment into this one. * Also imports outAtoms and functionalAtoms - * Reassigns inter fragment bonds of the parent fragment as either intra fragment bonds - * of the parent fragment or as inter fragment bonds of the parent fragment + * Reassigns inter-fragment bonds of the child fragment as either intra-fragment bonds + * of the parent fragment or as inter-fragment bonds of the parent fragment * - * The original fragment still maintains its original atomList/bondList/interFragmentBondList which is necessary for stereochemistry handling + * The original fragment still maintains its original atomList/bondList * * @param childFrag The fragment to be incorporated * @param parentFrag The parent fragment @@ -128,24 +141,24 @@ parentFrag.incorporateOutAtoms(childFrag); parentFrag.incorporateFunctionalAtoms(childFrag); - for (Bond bond : fragToInterFragmentBond.get(childFrag)) {//reassign inter fragment bonds of child - if (bond.getFromAtom().getFrag() ==parentFrag || bond.getToAtom().getFrag() ==parentFrag){ - if (bond.getFromAtom().getFrag() ==parentFrag && bond.getToAtom().getFrag() ==parentFrag){ - //bond is now enclosed within parentFrag so make it an intra fragment bond - //and remove it from the interfragment list of the parentFrag - parentFrag.addBond(bond); - fragToInterFragmentBond.get(parentFrag).remove(bond); - } - else{ - //bond was an interfragment bond between the childFrag and another frag - //It is now between the parentFrag and another frag - addInterFragmentBond(bond); - } + Set interFragmentBonds = fragToInterFragmentBond.get(childFrag); + if (interFragmentBonds == null){ + throw new StructureBuildingException("Fragment not registered with this FragmentManager!"); + } + for (Bond bond : interFragmentBonds) {//reassign inter-fragment bonds of child + if (bond.getFromAtom().getFrag() == parentFrag && bond.getToAtom().getFrag() == parentFrag){ + //bond is now enclosed within parentFrag so make it an intra-fragment bond + //and remove it from the inter-fragment set of the parentFrag + parentFrag.addBond(bond); + fragToInterFragmentBond.get(parentFrag).remove(bond); + } + else{ + //bond was an inter-fragment bond between the childFrag and another frag + //It is now between the parentFrag and another frag + addInterFragmentBond(bond); } } - if (!fragPile.remove(childFrag)){ - throw new StructureBuildingException("Fragment not found in fragPile"); - } + fragToInterFragmentBond.remove(childFrag); } /** Incorporates a fragment, usually a suffix, into a parent fragment, creating a bond between them. @@ -186,12 +199,11 @@ * @throws StructureBuildingException */ Atom getHeteroatom(String smiles) throws StructureBuildingException { - Fragment heteroAtomFrag = sBuilder.build(smiles, this); - List atomList = heteroAtomFrag.getAtomList(); - if (atomList.size()!=1){ + Fragment heteroAtomFrag = sBuilder.build(smiles); + if (heteroAtomFrag.getAtomCount() != 1){ throw new StructureBuildingException("Heteroatom smiles described a fragment with multiple SMILES!"); } - return atomList.get(0); + return heteroAtomFrag.getFirstAtom(); } /** Uses the information given in the given heteroatom to change the atomic symbol @@ -203,7 +215,7 @@ * @throws StructureBuildingException if a charge disagreement occurs */ void replaceAtomWithAtom(Atom a, Atom heteroAtom, boolean assignLocant) throws StructureBuildingException { - String elementSymbol =heteroAtom.getElement(); + ChemEl chemEl =heteroAtom.getElement(); int replacementCharge =heteroAtom.getCharge(); if (replacementCharge!=0){ if (a.getCharge()==0){ @@ -216,14 +228,14 @@ throw new StructureBuildingException("Charge conflict between replacement term and atom to be replaced"); } } - a.setElement(elementSymbol); + a.setElement(chemEl); a.removeElementSymbolLocants(); if (assignLocant){ - String primes =""; - while (a.getFrag().getAtomByLocant(elementSymbol+primes)!=null){//if element symbol already assigned, add a prime and try again - primes+="'"; + String primes = ""; + while (a.getFrag().getAtomByLocant(chemEl.toString() + primes) != null){//if element symbol already assigned, add a prime and try again + primes += "'"; } - a.addLocant(elementSymbol +primes); + a.addLocant(chemEl.toString() + primes); } } @@ -233,9 +245,11 @@ * @return The atom, or null if no such atom exists. */ Atom getAtomByID(int id) { - for(Fragment f : fragPile) { + for(Fragment f : fragments) { Atom a = f.getAtomByID(id); - if(a != null) return a; + if(a != null) { + return a; + } } return null; } @@ -248,7 +262,9 @@ */ Atom getAtomByIDOrThrow(int id) throws StructureBuildingException { Atom a = getAtomByID(id); - if(a == null) throw new StructureBuildingException("Couldn't get atom by id"); + if(a == null) { + throw new StructureBuildingException("Couldn't get atom by id"); + } return a; } @@ -257,7 +273,7 @@ * @throws StructureBuildingException */ void convertSpareValenciesToDoubleBonds() throws StructureBuildingException { - for(Fragment f : fragPile) { + for(Fragment f : fragments) { FragmentTools.convertSpareValenciesToDoubleBonds(f); } } @@ -267,55 +283,51 @@ * @throws StructureBuildingException */ void checkValencies() throws StructureBuildingException { - for(Fragment f : fragPile) { + for(Fragment f : fragments) { f.checkValencies(); } } - Set getBondPile() { - return Collections.unmodifiableSet(bondPile); - } - - Set getFragPile() { - return Collections.unmodifiableSet(fragPile); + Set getFragments() { + return Collections.unmodifiableSet(fragments); } /** - * Adds a fragment to the fragPile + * Registers a fragment * @param frag */ private void addFragment(Fragment frag) { - fragPile.add(frag); fragToInterFragmentBond.put(frag, new LinkedHashSet()); } /** - * Removes a fragment from the fragPile and inter fragment bonds associated with it from the bondpile/fragToInterFragmentBond. + * Removes a fragment + * Any inter-fragment bonds of this fragment are removed from the fragments it was connected to * Throws an exception if fragment wasn't present * @param frag * @throws StructureBuildingException */ void removeFragment(Fragment frag) throws StructureBuildingException { - if (!fragPile.remove(frag)){ - throw new StructureBuildingException("Fragment not found in fragPile"); + Set interFragmentBondsInvolvingFragmentSet = fragToInterFragmentBond.get(frag); + if (interFragmentBondsInvolvingFragmentSet == null) { + throw new StructureBuildingException("Fragment not registered with this FragmentManager!"); } - List interFragmentBondsInvolvingFragment = new ArrayList(fragToInterFragmentBond.get(frag)); + List interFragmentBondsInvolvingFragment = new ArrayList(interFragmentBondsInvolvingFragmentSet); for (Bond bond : interFragmentBondsInvolvingFragment) { - if (bond.getFromAtom().getFrag() ==frag){ + if (bond.getFromAtom().getFrag() == frag){ fragToInterFragmentBond.get(bond.getToAtom().getFrag()).remove(bond); } else{ fragToInterFragmentBond.get(bond.getFromAtom().getFrag()).remove(bond); } - bondPile.remove(bond); } fragToInterFragmentBond.remove(frag); } int getOverallCharge() { - int totalCharge=0; - for (Fragment frag : fragPile) { - totalCharge+=frag.getCharge(); + int totalCharge = 0; + for (Fragment frag : fragments) { + totalCharge += frag.getCharge(); } return totalCharge; } @@ -331,20 +343,20 @@ return copyAndRelabelFragment(originalFragment, 0); } - /** * Creates a copy of a fragment by copying data * labels the atoms using new ids from the idManager * @param originalFragment * @param primesToAdd: The minimum number of primes to add to the cloned atoms. More primes will be added if necessary to keep the locants unique e.g. N in the presence of N' becomes N'' when this is 1 * @return the clone of the fragment - * @throws StructureBuildingException */ - Fragment copyAndRelabelFragment(Fragment originalFragment, int primesToAdd) throws StructureBuildingException { - Fragment newFragment =new Fragment(originalFragment.getType(), originalFragment.getSubType()); + Fragment copyAndRelabelFragment(Fragment originalFragment, int primesToAdd) { + Element tokenEl = new TokenEl(""); + tokenEl.addAttribute(TYPE_ATR, originalFragment.getType()); + tokenEl.addAttribute(SUBTYPE_ATR, originalFragment.getSubType()); + Fragment newFragment = new Fragment(tokenEl); HashMap oldToNewAtomMap = new HashMap();//maps old Atom to new Atom List atomList =originalFragment.getAtomList(); - Atom defaultInAtom =originalFragment.getDefaultInAtom(); for (Atom atom : atomList) { int id = idManager.getNextID(); ArrayList newLocants = new ArrayList(atom.getLocants()); @@ -397,7 +409,7 @@ } Set oldAmbiguousElementAssignmentAtoms = atom.getProperty(Atom.AMBIGUOUS_ELEMENT_ASSIGNMENT); if (oldAmbiguousElementAssignmentAtoms!=null){ - Set newAtoms = new HashSet(); + Set newAtoms = new LinkedHashSet(); for (Atom oldAtom : oldAmbiguousElementAssignmentAtoms) { newAtoms.add(oldToNewAtomMap.get(oldAtom)); } @@ -419,6 +431,22 @@ if (isAnomeric!=null){ oldToNewAtomMap.get(atom).setProperty(Atom.ISANOMERIC, isAnomeric); } + Integer atomClass = atom.getProperty(Atom.ATOM_CLASS); + if (atomClass!=null){ + oldToNewAtomMap.get(atom).setProperty(Atom.ATOM_CLASS, atomClass); + } + String homologyGroup = atom.getProperty(Atom.HOMOLOGY_GROUP); + if (homologyGroup != null) { + oldToNewAtomMap.get(atom).setProperty(Atom.HOMOLOGY_GROUP, homologyGroup); + } + List oldPositionVariationAtoms = atom.getProperty(Atom.POSITION_VARIATION_BOND); + if (oldPositionVariationAtoms != null) { + List newAtoms = new ArrayList(); + for (Atom oldAtom : oldPositionVariationAtoms) { + newAtoms.add(oldToNewAtomMap.get(oldAtom)); + } + oldToNewAtomMap.get(atom).setProperty(Atom.POSITION_VARIATION_BOND, newAtoms); + } } for (int i = 0, l = originalFragment.getOutAtomCount(); i < l; i++) { OutAtom outAtom = originalFragment.getOutAtom(i); @@ -431,7 +459,9 @@ FunctionalAtom functionalAtom = originalFragment.getFunctionalAtom(i); newFragment.addFunctionalAtom(oldToNewAtomMap.get(functionalAtom.getAtom())); } - newFragment.setDefaultInAtom(oldToNewAtomMap.get(defaultInAtom)); + if (originalFragment.getDefaultInAtom() != null) { + newFragment.setDefaultInAtom(oldToNewAtomMap.get(originalFragment.getDefaultInAtom())); + } Set bondSet =originalFragment.getBondSet(); for (Bond bond : bondSet) { Bond newBond = createBond(oldToNewAtomMap.get(bond.getFromAtom()), oldToNewAtomMap.get(bond.getToAtom()), bond.getOrder()); @@ -475,19 +505,20 @@ * @throws StructureBuildingException */ Element cloneElement(BuildState state, Element elementToBeCloned, int primesToAdd) throws StructureBuildingException { - Element clone = new Element(elementToBeCloned); - List originalGroups = XOMTools.getDescendantElementsWithTagName(elementToBeCloned, XmlDeclarations.GROUP_EL); - List clonedGroups = XOMTools.getDescendantElementsWithTagName(clone, XmlDeclarations.GROUP_EL); + Element clone = elementToBeCloned.copy(); + List originalGroups = OpsinTools.getDescendantElementsWithTagName(elementToBeCloned, XmlDeclarations.GROUP_EL); + List clonedGroups = OpsinTools.getDescendantElementsWithTagName(clone, XmlDeclarations.GROUP_EL); HashMap oldNewFragmentMapping =new LinkedHashMap(); for (int i = 0; i < originalGroups.size(); i++) { - Fragment originalFragment =state.xmlFragmentMap.get(originalGroups.get(i)); + Fragment originalFragment = originalGroups.get(i).getFrag(); Fragment newFragment = copyAndRelabelFragment(originalFragment, primesToAdd); oldNewFragmentMapping.put(originalFragment, newFragment); - state.xmlFragmentMap.put(clonedGroups.get(i), newFragment); + newFragment.setTokenEl(clonedGroups.get(i)); + clonedGroups.get(i).setFrag(newFragment); List originalSuffixes =state.xmlSuffixMap.get(originalGroups.get(i)); List newSuffixFragments =new ArrayList(); for (Fragment suffix : originalSuffixes) { - newSuffixFragments.add(state.fragManager.copyFragment(suffix)); + newSuffixFragments.add(copyFragment(suffix)); } state.xmlSuffixMap.put(clonedGroups.get(i), newSuffixFragments); } @@ -554,44 +585,44 @@ } /** - * Checks if this bond is an inter fragment bond and if it is removes it + * Removes a bond from the inter-fragment bond mappings if it was present * @param bond */ private void removeInterFragmentBondIfPresent(Bond bond) { - if (bondPile.remove(bond)){ - fragToInterFragmentBond.get(bond.getFromAtom().getFrag()).remove(bond); - fragToInterFragmentBond.get(bond.getToAtom().getFrag()).remove(bond); - } + fragToInterFragmentBond.get(bond.getFromAtom().getFrag()).remove(bond); + fragToInterFragmentBond.get(bond.getToAtom().getFrag()).remove(bond); } /** - * Adds a bond to the inter fragment bond list and fragment to inter-fragment bond mappings + * Adds a bond to the fragment to inter-fragment bond mappings * @param bond */ private void addInterFragmentBond(Bond bond) { - bondPile.add(bond); fragToInterFragmentBond.get(bond.getFromAtom().getFrag()).add(bond); fragToInterFragmentBond.get(bond.getToAtom().getFrag()).add(bond); } /** - * Gets a set of the inter fragment bonds a fragment is involved in + * Gets an unmodifiable view of the set of the inter-fragment bonds a fragment is involved in * @param frag * @return set of inter fragment bonds */ Set getInterFragmentBonds(Fragment frag) { - return fragToInterFragmentBond.get(frag); + Set interFragmentBonds = fragToInterFragmentBond.get(frag); + if (interFragmentBonds == null) { + throw new IllegalArgumentException("Fragment not registered with this FragmentManager!"); + } + return Collections.unmodifiableSet(interFragmentBonds); } /** * Create a new Atom of the given element belonging to the given fragment - * @param elementSymbol + * @param chemEl * @param frag * @return Atom - * @throws StructureBuildingException */ - Atom createAtom(String elementSymbol, Fragment frag) throws StructureBuildingException { - Atom a = new Atom(idManager.getNextID(), elementSymbol, frag); + Atom createAtom(ChemEl chemEl, Fragment frag) { + Atom a = new Atom(idManager.getNextID(), chemEl, frag); frag.addAtom(a); return a; } @@ -619,11 +650,18 @@ } void removeAtomAndAssociatedBonds(Atom atom){ - ArrayList bondsToBeRemoved=new ArrayList(atom.getBonds()); + List bondsToBeRemoved = new ArrayList(atom.getBonds()); for (Bond bond : bondsToBeRemoved) { removeBond(bond); } atom.getFrag().removeAtom(atom); + Set ambiguousElementAssignment = atom.getProperty(Atom.AMBIGUOUS_ELEMENT_ASSIGNMENT); + if (ambiguousElementAssignment != null){ + ambiguousElementAssignment.remove(atom); + if (ambiguousElementAssignment.size() == 1){ + ambiguousElementAssignment.iterator().next().setProperty(Atom.AMBIGUOUS_ELEMENT_ASSIGNMENT, null); + } + } } void removeBond(Bond bond){ @@ -632,4 +670,99 @@ bond.getToAtom().removeBond(bond); removeInterFragmentBondIfPresent(bond); } + + /** + * Valency is used to determine the expected number of hydrogen + * Hydrogens are then added to bring the number of connections up to the minimum required to satisfy the atom's valency + * This allows the valency of the atom to be encoded e.g. phopshane-3 hydrogen, phosphorane-5 hydrogen. + * It is also necessary when considering stereochemistry as a hydrogen beats nothing in the CIP rules + * @throws StructureBuildingException + */ + void makeHydrogensExplicit() throws StructureBuildingException { + for (Fragment fragment : fragments) { + if (fragment.getSubType().equals(ELEMENTARYATOM_SUBTYPE_VAL)){//these do not have implicit hydrogen e.g. phosphorus is literally just a phosphorus atom + continue; + } + List atomList = fragment.getAtomList(); + for (Atom parentAtom : atomList) { + int explicitHydrogensToAdd = StructureBuildingMethods.calculateSubstitutableHydrogenAtoms(parentAtom); + for (int i = 0; i < explicitHydrogensToAdd; i++) { + Atom hydrogen = createAtom(ChemEl.H, fragment); + createBond(parentAtom, hydrogen, 1); + } + if (parentAtom.getAtomParity() != null){ + if (explicitHydrogensToAdd > 1) { + //Cannot have tetrahedral chirality and more than 2 hydrogens + parentAtom.setAtomParity(null);//probably caused by deoxy + } + else { + modifyAtomParityToTakeIntoAccountExplicitHydrogen(parentAtom); + } + } + } + } + } + + private void modifyAtomParityToTakeIntoAccountExplicitHydrogen(Atom atom) throws StructureBuildingException { + AtomParity atomParity = atom.getAtomParity(); + if (!StereoAnalyser.isPossiblyStereogenic(atom)){ + //no longer a stereoCentre e.g. due to unsaturation + atom.setAtomParity(null); + } + else{ + Atom[] atomRefs4 = atomParity.getAtomRefs4(); + Integer positionOfImplicitHydrogen = null; + Integer positionOfDeoxyHydrogen = null; + for (int i = 0; i < atomRefs4.length; i++) { + Atom a = atomRefs4[i]; + if (a.equals(AtomParity.hydrogen)){ + positionOfImplicitHydrogen = i; + } + else if (a.equals(AtomParity.deoxyHydrogen)){ + positionOfDeoxyHydrogen = i; + } + } + if (positionOfImplicitHydrogen != null || positionOfDeoxyHydrogen != null) { + //atom parity was set in SMILES, the dummy hydrogen atom has now been substituted + List neighbours = atom.getAtomNeighbours(); + for (Atom atomRef : atomRefs4) { + neighbours.remove(atomRef); + } + if (neighbours.size() == 0) { + throw new StructureBuildingException("OPSIN Bug: Unable to determine which atom has substituted a hydrogen at stereocentre"); + } + else if (neighbours.size() == 1 && positionOfDeoxyHydrogen != null) { + atomRefs4[positionOfDeoxyHydrogen] = neighbours.get(0); + if (positionOfImplicitHydrogen != null){ + throw new StructureBuildingException("OPSIN Bug: Unable to determine which atom has substituted a hydrogen at stereocentre"); + } + } + else if (neighbours.size() == 1 && positionOfImplicitHydrogen != null) { + atomRefs4[positionOfImplicitHydrogen] = neighbours.get(0); + } + else if (neighbours.size() == 2 && positionOfDeoxyHydrogen != null && positionOfImplicitHydrogen != null) { + try{ + List cipOrderedAtoms = new CipSequenceRules(atom).getNeighbouringAtomsInCipOrder(); + //higher priority group replaces the former hydroxy groups (deoxyHydrogen) + if (cipOrderedAtoms.indexOf(neighbours.get(0)) > cipOrderedAtoms.indexOf(neighbours.get(1))) { + atomRefs4[positionOfDeoxyHydrogen] = neighbours.get(0); + atomRefs4[positionOfImplicitHydrogen] = neighbours.get(1); + } + else{ + atomRefs4[positionOfDeoxyHydrogen] = neighbours.get(1); + atomRefs4[positionOfImplicitHydrogen] = neighbours.get(0); + } + } + catch (CipOrderingException e){ + //assume ligands equivalent so it makes no difference which is which + atomRefs4[positionOfDeoxyHydrogen] = neighbours.get(0); + atomRefs4[positionOfImplicitHydrogen] = neighbours.get(1); + } + } + else{ + throw new StructureBuildingException("OPSIN Bug: Unable to determine which atom has substituted a hydrogen at stereocentre"); + } + } + } + } } \ No newline at end of file diff -Nru opsin-1.5.0/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/FragmentTools.java opsin-2.3.1/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/FragmentTools.java --- opsin-1.5.0/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/FragmentTools.java 2013-07-21 14:02:29.000000000 +0000 +++ opsin-2.3.1/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/FragmentTools.java 2017-07-23 20:55:18.000000000 +0000 @@ -1,11 +1,12 @@ package uk.ac.cam.ch.wwmm.opsin; +import java.util.ArrayDeque; import java.util.ArrayList; import java.util.Collections; import java.util.Comparator; +import java.util.Deque; import java.util.HashMap; import java.util.HashSet; -import java.util.LinkedList; import java.util.List; import java.util.Map; import java.util.Set; @@ -24,36 +25,30 @@ */ class SortAtomsForElementSymbols implements Comparator { - final Map atomToPreviousBondMap; - public SortAtomsForElementSymbols(Map atomToPreviousBondMap) { - this.atomToPreviousBondMap = atomToPreviousBondMap; - } - public int compare(Atom a, Atom b){ - Bond bondA = atomToPreviousBondMap.get(a); - Bond bondB = atomToPreviousBondMap.get(b); - if (bondA.getOrder() > bondB.getOrder()){//lower order bond is preferred + int bondOrderA = a.getProperty(Atom.VISITED); + int bondOrderB = b.getProperty(Atom.VISITED); + if (bondOrderA > bondOrderB) {//lower order bond is preferred return 1; } - if (bondA.getOrder() < bondB.getOrder()){ + if (bondOrderA < bondOrderB) { return -1; } - if (a.getOutValency() > b.getOutValency()){//prefer atoms with outValency + if (a.getOutValency() > b.getOutValency()) {//prefer atoms with outValency return -1; } - if (a.getOutValency() < b.getOutValency()){ + if (a.getOutValency() < b.getOutValency()) { return 1; } int expectedHydrogenA = StructureBuildingMethods.calculateSubstitutableHydrogenAtoms(a); int expectedHydrogenB = StructureBuildingMethods.calculateSubstitutableHydrogenAtoms(b); - - - if (expectedHydrogenA > expectedHydrogenB){//prefer atoms with more hydrogen + + if (expectedHydrogenA > expectedHydrogenB) {//prefer atoms with more hydrogen return -1; } - if (expectedHydrogenA < expectedHydrogenB){ + if (expectedHydrogenA < expectedHydrogenB) { return 1; } return 0; @@ -69,25 +64,25 @@ class SortAtomsForMainGroupElementSymbols implements Comparator { public int compare(Atom a, Atom b){ - int compare =a.getElement().compareTo(b.getElement()); - if (compare !=0){//only bother comparing properly if elements are the same + int compare = a.getElement().compareTo(b.getElement()); + if (compare != 0) {//only bother comparing properly if elements are the same return compare; } int aExpectedHydrogen = StructureBuildingMethods.calculateSubstitutableHydrogenAtoms(a); - int bExpectedHydrogen =StructureBuildingMethods.calculateSubstitutableHydrogenAtoms(b); - if (aExpectedHydrogen >0 && bExpectedHydrogen ==0){//having substitutable hydrogen preferred + int bExpectedHydrogen = StructureBuildingMethods.calculateSubstitutableHydrogenAtoms(b); + if (aExpectedHydrogen > 0 && bExpectedHydrogen == 0) {//having substitutable hydrogen preferred return -1; } - if (aExpectedHydrogen ==0 && bExpectedHydrogen >0){ + if (aExpectedHydrogen == 0 && bExpectedHydrogen > 0) { return 1; } List locantsA = a.getLocants(); List locantsB = b.getLocants(); - if (locantsA.size() ==0 && locantsB.size() >0){//having no locants preferred + if (locantsA.size() == 0 && locantsB.size() > 0) {//having no locants preferred return -1; } - if (locantsA.size() >0 && locantsB.size() ==0){ + if (locantsA.size() > 0 && locantsB.size() == 0) { return 1; } return 0; @@ -164,57 +159,55 @@ */ static void assignElementLocants(Fragment suffixableFragment, List suffixFragments) throws StructureBuildingException { - HashMap elementCount =new HashMap();//keeps track of how many times each element has been seen - HashSet atomsToIgnore = new HashSet();//atoms which already have a symbolic locant + Map elementCount = new HashMap();//keeps track of how many times each element has been seen + Set atomsToIgnore = new HashSet();//atoms which already have a symbolic locant - ArrayList allFragments =new ArrayList(suffixFragments); + List allFragments = new ArrayList(suffixFragments); allFragments.add(suffixableFragment); /* * First check whether any element locants have already been assigned, these will take precedence */ for (Fragment fragment : allFragments) { - List atomList =fragment.getAtomList(); + List atomList = fragment.getAtomList(); for (Atom atom : atomList) { - List elementSymbolLocants =atom.getElementSymbolLocants(); + List elementSymbolLocants = atom.getElementSymbolLocants(); for (String locant : elementSymbolLocants) { - int primeCount =0; - for(int i=0;i elementToIgnore = new HashSet(); - for (String element : elementCount.keySet()) { - elementToIgnore.add(element); - } - for (Fragment fragment : allFragments) { - List atomList =fragment.getAtomList(); - for (Atom atom : atomList) { - if (elementToIgnore.contains(atom.getElement())){ - atomsToIgnore.add(atom); + { + Set elementsToIgnore = elementCount.keySet(); + + for (Fragment fragment : allFragments) { + List atomList = fragment.getAtomList(); + for (Atom atom : atomList) { + if (elementsToIgnore.contains(atom.getElement().toString())){ + atomsToIgnore.add(atom); + } } } } - - + String fragType = suffixableFragment.getType(); if (fragType.equals(NONCARBOXYLICACID_TYPE_VAL) || fragType.equals(CHALCOGENACIDSTEM_TYPE_VAL)){ - if (suffixFragments.size()!=0){ + if (suffixFragments.size() != 0){ throw new StructureBuildingException("No suffix fragments were expected to be present on non carboxylic acid"); } processNonCarboxylicAcidLabelling(suffixableFragment, elementCount, atomsToIgnore); } else{ - if (suffixFragments.size()>0){ + if (suffixFragments.size() > 0){ processSuffixLabelling(suffixFragments, elementCount, atomsToIgnore); - if (elementCount.get("N")!=null && elementCount.get("N")>1){//look for special case violation of IUPAC rule, =(N)=(NN) is N//N' in practice rather than N/N'/N'' + Integer seenCount = elementCount.get("N"); + if (seenCount != null && seenCount > 1){//look for special case violation of IUPAC rule, =(N)=(NN) is N//N' in practice rather than N/N'/N'' //this method will put both locants on the N with substituable hydrogen detectAndCorrectHydrazoneDerivativeViolation(suffixFragments); } @@ -223,17 +216,16 @@ } } - private static void detectAndCorrectHydrazoneDerivativeViolation(List suffixFragments) { fragmentLoop: for (Fragment suffixFrag : suffixFragments) { List atomList = suffixFrag.getAtomList(); for (Atom atom : atomList) { - if (atom.getElement().equals("N") && atom.getIncomingValency() ==3 ){ + if (atom.getElement() == ChemEl.N && atom.getIncomingValency() ==3 ){ List locants =atom.getLocants(); if (locants.size()==1 && MATCH_ELEMENT_SYMBOL_LOCANT.matcher(locants.get(0)).matches()){ List neighbours = atom.getAtomNeighbours(); for (Atom neighbour : neighbours) { - if (neighbour.getElement().equals("N") && neighbour.getIncomingValency()==1){ + if (neighbour.getElement() == ChemEl.N && neighbour.getIncomingValency()==1){ String locantToAdd = locants.get(0); atom.clearLocants(); neighbour.addLocant(locantToAdd); @@ -246,13 +238,9 @@ } } - - private static void processMainGroupLabelling(Fragment suffixableFragment, HashMap elementCount, HashSet atomsToIgnore) { - HashSet elementToIgnore = new HashSet(); - for (String element : elementCount.keySet()) { - elementToIgnore.add(element); - } - List atomList =suffixableFragment.getAtomList(); + private static void processMainGroupLabelling(Fragment suffixableFragment, Map elementCount, Set atomsToIgnore) { + Set elementToIgnore = new HashSet(elementCount.keySet()); + List atomList = suffixableFragment.getAtomList(); Collections.sort(atomList, new SortAtomsForMainGroupElementSymbols()); Atom atomToAddCLabelTo = null;//only add a C label if there is only one C in the main group boolean seenMoreThanOneC = false; @@ -260,117 +248,111 @@ if (atomsToIgnore.contains(atom)){ continue; } - String element =atom.getElement(); - if (elementToIgnore.contains(element)){ + ChemEl chemEl = atom.getElement(); + if (elementToIgnore.contains(chemEl.toString())){ continue; } - if (element.equals("C")){ - if (seenMoreThanOneC){ + if (chemEl == ChemEl.C) { + if (seenMoreThanOneC) { continue; } - if (atomToAddCLabelTo !=null){ + if (atomToAddCLabelTo != null){ atomToAddCLabelTo = null; - seenMoreThanOneC =true; + seenMoreThanOneC = true; } else{ - atomToAddCLabelTo =atom; + atomToAddCLabelTo = atom; } - continue; - } - if (elementCount.get(element)==null){ - atom.addLocant(element); - elementCount.put(element,1); } else{ - int count =elementCount.get(element); - atom.addLocant(element + StringTools.multiplyString("'", count)); - elementCount.put(element, count +1); + assignLocant(atom, elementCount); } } - if (atomToAddCLabelTo !=null){ + if (atomToAddCLabelTo != null){ atomToAddCLabelTo.addLocant("C"); } } - - private static void processSuffixLabelling(List suffixFragments, HashMap elementCount, HashSet atomsToIgnore) throws StructureBuildingException { - LinkedList startingAtoms = new LinkedList(); - Map atomPreviousBondMap = new HashMap(); + private static void processSuffixLabelling(List suffixFragments, Map elementCount, Set atomsToIgnore) { + List startingAtoms = new ArrayList(); Set atomsVisited = new HashSet(); for (Fragment fragment : suffixFragments) { - List suffixAtomList =fragment.getAtomList(); - Atom rAtom = suffixAtomList.get(0); - LinkedList nextAtoms = new LinkedList(rAtom.getAtomNeighbours()); - for (Atom nextAtom : nextAtoms) { - atomsVisited.add(nextAtom); - atomPreviousBondMap.put(nextAtom, rAtom.getBondToAtomOrThrow(nextAtom)); - } + Atom rAtom = fragment.getFirstAtom(); + List nextAtoms = getIntraFragmentNeighboursAndSetVisitedBondOrder(rAtom); + atomsVisited.addAll(nextAtoms); startingAtoms.addAll(nextAtoms); } - Collections.sort(startingAtoms, new SortAtomsForElementSymbols(atomPreviousBondMap)); + Collections.sort(startingAtoms, new SortAtomsForElementSymbols()); - while (startingAtoms.size() > 0){ - assignLocantsAndExploreNeighbours(elementCount, atomsToIgnore, atomsVisited, startingAtoms); + Deque atomsToConsider = new ArrayDeque(startingAtoms); + while (atomsToConsider.size() > 0){ + assignLocantsAndExploreNeighbours(elementCount, atomsToIgnore, atomsVisited, atomsToConsider); } } - - private static void processNonCarboxylicAcidLabelling(Fragment suffixableFragment, HashMap elementCount,HashSet atomsToIgnore) throws StructureBuildingException { + private static void processNonCarboxylicAcidLabelling(Fragment suffixableFragment, Map elementCount, Set atomsToIgnore) { Set atomsVisited = new HashSet(); - List atomList =suffixableFragment.getAtomList(); - Atom firstAtom = atomList.get(0); - LinkedList nextAtoms = new LinkedList(firstAtom.getAtomNeighbours()); - Map atomPreviousBondMap = new HashMap(); - for (Atom nextAtom : nextAtoms) { - atomPreviousBondMap.put(nextAtom, firstAtom.getBondToAtomOrThrow(nextAtom)); - } - Collections.sort(nextAtoms, new SortAtomsForElementSymbols(atomPreviousBondMap)); + Atom firstAtom = suffixableFragment.getFirstAtom(); + List startingAtoms = getIntraFragmentNeighboursAndSetVisitedBondOrder(firstAtom); + + Collections.sort(startingAtoms, new SortAtomsForElementSymbols()); atomsVisited.add(firstAtom); - while (nextAtoms.size() > 0){ - assignLocantsAndExploreNeighbours(elementCount, atomsToIgnore, atomsVisited, nextAtoms); + Deque atomsToConsider = new ArrayDeque(startingAtoms); + while (atomsToConsider.size() > 0){ + assignLocantsAndExploreNeighbours(elementCount, atomsToIgnore, atomsVisited, atomsToConsider); } - if (!atomsToIgnore.contains(firstAtom) && firstAtom.determineValency(true) > firstAtom.getIncomingValency()){ + if (!atomsToIgnore.contains(firstAtom) && firstAtom.determineValency(true) > firstAtom.getIncomingValency()) { //e.g. carbonimidoyl the carbon has locant C assignLocant(firstAtom, elementCount); } } - - private static void assignLocantsAndExploreNeighbours(HashMap elementCount, HashSet atomsToIgnore, Set atomsVisited, LinkedList nextAtoms) throws StructureBuildingException { - Atom atom = nextAtoms.removeFirst(); + private static void assignLocantsAndExploreNeighbours(Map elementCount, Set atomsToIgnore, Set atomsVisited, Deque atomsToConsider) { + Atom atom = atomsToConsider.removeFirst(); atomsVisited.add(atom); - if (!atomsToIgnore.contains(atom)){//assign locant + if (!atomsToIgnore.contains(atom)) {//assign locant assignLocant(atom, elementCount); } - List atomNeighbours = atom.getAtomNeighbours(); - for (int i = atomNeighbours.size() -1; i >=0; i--) { - if (atomsVisited.contains(atomNeighbours.get(i))){ - atomNeighbours.remove(i); - } - } - Map atomPreviousBondMap = new HashMap(); - for (Atom atomNeighbour : atomNeighbours) { - atomPreviousBondMap.put(atomNeighbour, atom.getBondToAtomOrThrow(atomNeighbour)); + List atomsToExplore = getIntraFragmentNeighboursAndSetVisitedBondOrder(atom); + atomsToExplore.removeAll(atomsVisited); + Collections.sort(atomsToExplore, new SortAtomsForElementSymbols()); + for (int i = atomsToExplore.size() - 1; i >= 0; i--) { + atomsToConsider.addFirst(atomsToExplore.get(i)); } - Collections.sort(atomNeighbours, new SortAtomsForElementSymbols(atomPreviousBondMap)); - nextAtoms.addAll(0, atomNeighbours); } - - private static void assignLocant(Atom atom, HashMap elementCount) { - String element =atom.getElement(); - if (elementCount.get(element)==null){ + /** + * Gets the neighbours of an atom that claim to be within the same frag + * The order of bond taken to get to the neighbour is set on the neighbours Atom.VISITED property + * @param atom + * @return + */ + private static List getIntraFragmentNeighboursAndSetVisitedBondOrder(Atom atom) { + List atomsToExplore = new ArrayList(); + List bonds = atom.getBonds(); + for (Bond bond : bonds) { + Atom neighbour = bond.getOtherAtom(atom); + if (neighbour.getFrag().equals(atom.getFrag())) { + atomsToExplore.add(neighbour); + neighbour.setProperty(Atom.VISITED, bond.getOrder()); + } + } + return atomsToExplore; + } + + private static void assignLocant(Atom atom, Map elementCount) { + String element = atom.getElement().toString(); + Integer count = elementCount.get(element); + if (count == null){ atom.addLocant(element); - elementCount.put(element,1); + elementCount.put(element, 1); } else{ - int count =elementCount.get(element); atom.addLocant(element + StringTools.multiplyString("'", count)); - elementCount.put(element, count +1); + elementCount.put(element, count + 1); } } - /** Adjusts the order of a bond in a fragment. * * @param fromAtom The lower-numbered atom in the bond @@ -432,51 +414,20 @@ Bond b = fromAtom.getBondToAtomOrThrow(toAtom); b.setOrder(bondOrder); } - - /** Works out where to put an "one", if this is unspecified. position 2 for propanone - * and higher, else 1. Position 2 is assumed to be 1 higher than the atomIndice given. - * - * @param fragment The fragment - * @param atomIndice - * @return the appropriate atom indice - * @throws StructureBuildingException - */ - static int findKetoneAtomIndice(Fragment fragment, int atomIndice) throws StructureBuildingException { - if(fragment.getChainLength() < 3){ - return atomIndice; - } - else { - if (atomIndice +1>=fragment.getAtomList().size()){ - return 1;//this probably indicates a problem with the input name but nonetheless 1 is a better answer than an indice which isn't even in the range of the fragment - } - else{ - return atomIndice +1; - } - } - } - - /**Adjusts the labeling on a fused ring system, such that bridgehead atoms - * have locants endings in 'a' or 'b' etc. Example: naphthalene - * 1,2,3,4,5,6,7,8,9,10->1,2,3,4,4a,5,6,7,8,8a - * @param fusedring - */ - static void relabelFusedRingSystem(Fragment fusedring){ - relabelFusedRingSystem(fusedring.getAtomList()); - } /**Adjusts the labeling on a fused ring system, such that bridgehead atoms * have locants endings in 'a' or 'b' etc. Example: naphthalene * 1,2,3,4,5,6,7,8,9,10->1,2,3,4,4a,5,6,7,8,8a * @param atomList */ - static void relabelFusedRingSystem(List atomList) { + static void relabelLocantsAsFusedRingSystem(List atomList) { int locantVal = 0; char locantLetter = 'a'; for (Atom atom : atomList) { atom.clearLocants(); } for (Atom atom : atomList) { - if(!atom.getElement().equals("C") || atom.getBonds().size() < 3) { + if(atom.getElement() != ChemEl.C || atom.getBondCount() < 3) { locantVal++; locantLetter = 'a'; atom.addLocant(Integer.toString(locantVal)); @@ -533,16 +484,15 @@ * Given the starting nitrogen returns the other nitrogen or null if that nitrogen does not appear to be involved in such tautomerism * @param nitrogen * @return null or the other nitrogen - * @throws StructureBuildingException */ - static Atom detectSimpleNitrogenTautomer(Atom nitrogen) throws StructureBuildingException { - if (nitrogen.getElement().equals("N") && nitrogen.getAtomIsInACycle()){ + static Atom detectSimpleNitrogenTautomer(Atom nitrogen) { + if (nitrogen.getElement() == ChemEl.N && nitrogen.getAtomIsInACycle()){ for (Atom neighbour : nitrogen.getAtomNeighbours()) { - if (neighbour.hasSpareValency() && neighbour.getElement().equals("C") && neighbour.getAtomIsInACycle()){ + if (neighbour.hasSpareValency() && neighbour.getElement() == ChemEl.C && neighbour.getAtomIsInACycle()){ List distance2Neighbours = neighbour.getAtomNeighbours(); distance2Neighbours.remove(nitrogen); for (Atom distance2Neighbour : distance2Neighbours) { - if (distance2Neighbour.hasSpareValency() && distance2Neighbour.getElement().equals("N") && distance2Neighbour.getAtomIsInACycle() && distance2Neighbour.getCharge()==0){ + if (distance2Neighbour.hasSpareValency() && distance2Neighbour.getElement() == ChemEl.N && distance2Neighbour.getAtomIsInACycle() && distance2Neighbour.getCharge()==0){ return distance2Neighbour; } } @@ -554,28 +504,28 @@ /**Increases the order of bonds joining atoms with spareValencies, * and uses up said spareValencies. + * [spare valency is an indication of the atom's desire to form the maximum number of non-cumulative double bonds] * @param frag * @throws StructureBuildingException If the algorithm can't work out where to put the bonds */ static void convertSpareValenciesToDoubleBonds(Fragment frag) throws StructureBuildingException { - List atomCollection =frag.getAtomList(); + List atomCollection = frag.getAtomList(); /* pick atom, getAtomNeighbours, decideIfTerminal, resolve */ /* - * Correct spare valency by looking at valencyState of atom - * + * Remove spare valency on atoms with valency precluding creation of double bonds */ for(Atom a : atomCollection) { a.ensureSVIsConsistantWithValency(true); } /* - * Remove spare valency on atoms which may not form higher order bonds + * Remove spare valency on atoms that are not adjacent to another atom with spare valency */ atomLoop: for(Atom a : atomCollection) { if(a.hasSpareValency()) { for(Atom aa : frag.getIntraFragmentAtomNeighbours(a)) { - if(aa.hasSpareValency()){ + if(aa.hasSpareValency()) { continue atomLoop; } } @@ -584,29 +534,31 @@ } /* - Reduce valency of atoms which cannot possibly have any of their bonds converted to double bonds - pick an atom which definitely does have spare valency to be the indicated hydrogen. + * The indicated hydrogen from the original SMILES definition of the fragment e.g. [nH] are used to disambiguate if there are + * an odd number of atoms with spare valency. Hence pyrrole is unambiguously 1H-pyrrole unless specified otherwise + * Things gets more complicated if the input contained multiple indicated hydrogen as it is unclear whether these still apply to the final molecule */ - Atom atomToReduceValencyAt =null; + Atom atomToReduceValencyAt = null; List originalIndicatedHydrogen = frag.getIndicatedHydrogen(); - List indicatedHydrogen = new ArrayList(originalIndicatedHydrogen); - for (int i = indicatedHydrogen.size() -1; i >=0; i--) { - if (!indicatedHydrogen.get(i).hasSpareValency()){ - indicatedHydrogen.remove(i); + List indicatedHydrogen = new ArrayList(originalIndicatedHydrogen.size()); + for (Atom atom : frag.getIndicatedHydrogen()) { + if (atom.hasSpareValency() && atom.getCharge() == 0) { + indicatedHydrogen.add(atom); } } - if (indicatedHydrogen.size()>0){ - if (indicatedHydrogen.size()>1){ + if (indicatedHydrogen.size() > 0) { + //typically there will be only one indicated hydrogen + if (indicatedHydrogen.size() > 1) { for (Atom indicatedAtom : indicatedHydrogen) { boolean couldBeInvolvedInSimpleNitrogenTautomerism = false;//fix for guanine like purine derivatives - if (indicatedAtom.getElement().equals("N") && indicatedAtom.getAtomIsInACycle()){ + if (indicatedAtom.getElement() == ChemEl.N && indicatedAtom.getAtomIsInACycle()) { atomloop : for (Atom neighbour : indicatedAtom.getAtomNeighbours()) { - if (neighbour.getElement().equals("C") && neighbour.getAtomIsInACycle()){ + if (neighbour.getElement() == ChemEl.C && neighbour.getAtomIsInACycle()) { List distance2Neighbours = neighbour.getAtomNeighbours(); distance2Neighbours.remove(indicatedAtom); for (Atom distance2Neighbour : distance2Neighbours) { - if (distance2Neighbour.getElement().equals("N") && distance2Neighbour.getAtomIsInACycle() && !originalIndicatedHydrogen.contains(distance2Neighbour)){ - couldBeInvolvedInSimpleNitrogenTautomerism =true; + if (distance2Neighbour.getElement() == ChemEl.N && distance2Neighbour.getAtomIsInACycle() && !originalIndicatedHydrogen.contains(distance2Neighbour)){ + couldBeInvolvedInSimpleNitrogenTautomerism = true; break atomloop; } } @@ -614,7 +566,7 @@ } } //retain spare valency if has the cyclic [NH]C=N moiety but substitution has meant that this tautomerism doesn't actually occur cf. 8-oxoguanine - if (!couldBeInvolvedInSimpleNitrogenTautomerism || detectSimpleNitrogenTautomer(indicatedAtom) != null){ + if (!couldBeInvolvedInSimpleNitrogenTautomerism || detectSimpleNitrogenTautomer(indicatedAtom) != null) { indicatedAtom.setSpareValency(false); } } @@ -629,51 +581,13 @@ svCount += a.hasSpareValency() ? 1 :0; } + /* + * Double-bonds go between pairs of atoms so if there are an off number of candidate atoms (e.g. pyrrole) an atom must be chosen + * The atom with indicated hydrogen (see above) is used in preference else heuristics are used to chose a candidate + */ if((svCount % 2) == 1) { - if (atomToReduceValencyAt ==null){ - for(Atom a : atomCollection) {//try and find an atom with SV that neighbours only one atom with SV - if(a.hasSpareValency()) { - int atomsWithSV =0; - for(Atom aa : frag.getIntraFragmentAtomNeighbours(a)) { - if(aa.hasSpareValency()) { - atomsWithSV++; - } - } - if (atomsWithSV==1){ - atomToReduceValencyAt=a; - break; - } - } - } - if (atomToReduceValencyAt==null){ - atomLoop: for(Atom a : atomCollection) {//try and find an atom with bridgehead atoms with SV on both sides c.f. phenoxastibinine ==10H-phenoxastibinine - if(a.hasSpareValency()) { - List neighbours =frag.getIntraFragmentAtomNeighbours(a); - if (neighbours.size()==2){ - for(Atom aa : neighbours) { - if(frag.getIntraFragmentAtomNeighbours(aa).size() < 3){ - continue atomLoop; - } - } - atomToReduceValencyAt=a; - break; - } - } - } - if (atomToReduceValencyAt==null){//Prefer nitrogen to carbon e.g. get NHC=C rather than N=CCH - for(Atom a : atomCollection) { - if(a.hasSpareValency()) { - if (atomToReduceValencyAt==null){ - atomToReduceValencyAt=a;//else just go with the first atom with SV encountered - } - if (!a.getElement().equals("C")){ - atomToReduceValencyAt=a; - break; - } - } - } - } - } + if (atomToReduceValencyAt == null) { + atomToReduceValencyAt = findBestAtomToRemoveSpareValencyFrom(frag, atomCollection); } atomToReduceValencyAt.setSpareValency(false); svCount--; @@ -683,6 +597,7 @@ boolean foundTerminalFlag = false; boolean foundNonBridgeHeadFlag = false; boolean foundBridgeHeadFlag = false; + //First handle cases where double bond placement is completely unambiguous i.e. an atom where only one neighbour has spare valency for(Atom a : atomCollection) { if(a.hasSpareValency()) { int count = 0; @@ -705,56 +620,116 @@ } } } - if(!foundTerminalFlag) { - for(Atom a : atomCollection) { - List neighbours =frag.getIntraFragmentAtomNeighbours(a); - if(a.hasSpareValency() && neighbours.size() < 3) { - for(Atom aa : neighbours) { - if(aa.hasSpareValency()) { - foundNonBridgeHeadFlag = true; - a.setSpareValency(false); - aa.setSpareValency(false); - a.getBondToAtomOrThrow(aa).addOrder(1); - svCount -= 2;//Two atoms where one of them is not a bridge head - break; - } + if(foundTerminalFlag) { + continue; + } + + //Find two atoms where one, or both, of them are not bridgeheads + for(Atom a : atomCollection) { + List neighbours = frag.getIntraFragmentAtomNeighbours(a); + if(a.hasSpareValency() && neighbours.size() < 3) { + for(Atom aa : neighbours) { + if(aa.hasSpareValency()) { + foundNonBridgeHeadFlag = true; + a.setSpareValency(false); + aa.setSpareValency(false); + a.getBondToAtomOrThrow(aa).addOrder(1); + svCount -= 2;//Two atoms where one of them is not a bridge head + break; } } - if(foundNonBridgeHeadFlag) break; } - if(!foundNonBridgeHeadFlag){ - for(Atom a : atomCollection) { - List neighbours =frag.getIntraFragmentAtomNeighbours(a); - if(a.hasSpareValency()) { - for(Atom aa : neighbours) { - if(aa.hasSpareValency()) { - foundBridgeHeadFlag = true; - a.setSpareValency(false); - aa.setSpareValency(false); - a.getBondToAtomOrThrow(aa).addOrder(1); - svCount -= 2;//Two atoms where both of them are a bridge head e.g. necessary for something like coronene - break; - } - } + if(foundNonBridgeHeadFlag) { + break; + } + } + if(foundNonBridgeHeadFlag) { + continue; + } + + //Find two atoms where both of them are bridgheads + for(Atom a : atomCollection) { + List neighbours = frag.getIntraFragmentAtomNeighbours(a); + if(a.hasSpareValency()) { + for(Atom aa : neighbours) { + if(aa.hasSpareValency()) { + foundBridgeHeadFlag = true; + a.setSpareValency(false); + aa.setSpareValency(false); + a.getBondToAtomOrThrow(aa).addOrder(1); + svCount -= 2;//Two atoms where both of them are a bridge head e.g. necessary for something like coronene + break; } - if(foundBridgeHeadFlag) break; } - if(!foundBridgeHeadFlag){ - throw new StructureBuildingException("Could not assign all higher order bonds."); + } + if(foundBridgeHeadFlag) { + break; + } + } + if(!foundBridgeHeadFlag) { + throw new StructureBuildingException("Failed to assign all double bonds! (Check that indicated hydrogens have been appropriately specified)"); + } + } + } + + private static Atom findBestAtomToRemoveSpareValencyFrom(Fragment frag, List atomCollection) { + for(Atom a : atomCollection) {//try and find an atom with SV that neighbours only one atom with SV + if(a.hasSpareValency()) { + int atomsWithSV = 0; + for(Atom aa : frag.getIntraFragmentAtomNeighbours(a)) { + if(aa.hasSpareValency()) { + atomsWithSV++; } } + if (atomsWithSV == 1) { + return a; + } } } + atomLoop: for(Atom a : atomCollection) {//try and find an atom with bridgehead atoms with SV on both sides c.f. phenoxastibinine == 10H-phenoxastibinine + if(a.hasSpareValency()) { + List neighbours = frag.getIntraFragmentAtomNeighbours(a); + if (neighbours.size() == 2) { + for(Atom aa : neighbours) { + if(frag.getIntraFragmentAtomNeighbours(aa).size() < 3){ + continue atomLoop; + } + } + return a; + } + } + } + //Prefer nitrogen to carbon e.g. get NHC=C rather than N=CCH + Atom firstAtomWithSpareValency = null; + Atom firstHeteroAtomWithSpareValency = null; + for(Atom a : atomCollection) { + if(a.hasSpareValency()) { + if (a.getElement() != ChemEl.C) { + if (a.getCharge() == 0) { + return a; + } + if(firstHeteroAtomWithSpareValency == null) { + firstHeteroAtomWithSpareValency = a; + } + } + if(firstAtomWithSpareValency == null) { + firstAtomWithSpareValency = a; + } + } + } + if (firstAtomWithSpareValency == null) { + throw new IllegalArgumentException("OPSIN Bug: No atom had spare valency!"); + } + return firstHeteroAtomWithSpareValency != null ? firstHeteroAtomWithSpareValency : firstAtomWithSpareValency; } - static Atom getAtomByAminoAcidStyleLocant(Atom backboneAtom, String elementSymbol, String primes) throws StructureBuildingException { + static Atom getAtomByAminoAcidStyleLocant(Atom backboneAtom, String elementSymbol, String primes) { //Search for appropriate atom by using the same algorithm as is used to assign locants initially - LinkedList nextAtoms = new LinkedList(); - Map atomPreviousBondMap = new HashMap(); + List startingAtoms = new ArrayList(); Set atomsVisited = new HashSet(); - List neighbours = backboneAtom.getAtomNeighbours(); + List neighbours = getIntraFragmentNeighboursAndSetVisitedBondOrder(backboneAtom); mainLoop: for (Atom neighbour : neighbours) { atomsVisited.add(neighbour); if (!neighbour.getType().equals(SUFFIX_TYPE_VAL)){ @@ -764,19 +739,19 @@ } } } - nextAtoms.add(neighbour); - atomPreviousBondMap.put(neighbour, backboneAtom.getBondToAtomOrThrow(neighbour)); + startingAtoms.add(neighbour); } - Collections.sort(nextAtoms, new SortAtomsForElementSymbols(atomPreviousBondMap)); - HashMap elementCount =new HashMap();//keeps track of how many times each element has been seen + Collections.sort(startingAtoms, new SortAtomsForElementSymbols()); + Map elementCount = new HashMap();//keeps track of how many times each element has been seen + Deque atomsToConsider = new ArrayDeque(startingAtoms); boolean hydrazoneSpecialCase =false;//look for special case violation of IUPAC rule where the locant of the =N- atom is skipped. This flag is set when =N- is encountered - while (nextAtoms.size() > 0){ - Atom atom = nextAtoms.removeFirst(); + while (atomsToConsider.size() > 0){ + Atom atom = atomsToConsider.removeFirst(); atomsVisited.add(atom); int primesOnPossibleAtom =0; - String element =atom.getElement(); + String element =atom.getElement().toString(); if (elementCount.get(element)==null){ elementCount.put(element,1); } @@ -792,12 +767,10 @@ hydrazoneSpecialCase =false; } - List atomNeighbours = atom.getAtomNeighbours(); + List atomNeighbours = getIntraFragmentNeighboursAndSetVisitedBondOrder(atom); + atomNeighbours.removeAll(atomsVisited); for (int i = atomNeighbours.size() -1; i >=0; i--) { Atom neighbour = atomNeighbours.get(i); - if (atomsVisited.contains(neighbour)){ - atomNeighbours.remove(i); - } if (!neighbour.getType().equals(SUFFIX_TYPE_VAL)){ for (String neighbourLocant : neighbour.getLocants()) { if (MATCH_NUMERIC_LOCANT.matcher(neighbourLocant).matches()){//gone to an inappropriate atom @@ -807,8 +780,8 @@ } } } - if (atom.getElement().equals("N") && atom.getIncomingValency() ==3 && atom.getCharge()==0 - && atomNeighbours.size()==1 && atomNeighbours.get(0).getElement().equals("N")){ + if (atom.getElement() == ChemEl.N && atom.getIncomingValency() ==3 && atom.getCharge()==0 + && atomNeighbours.size()==1 && atomNeighbours.get(0).getElement() == ChemEl.N){ hydrazoneSpecialCase =true; } else{ @@ -818,15 +791,14 @@ } } } - atomPreviousBondMap = new HashMap(); - for (Atom atomNeighbour : atomNeighbours) { - atomPreviousBondMap.put(atomNeighbour, atom.getBondToAtomOrThrow(atomNeighbour)); + + Collections.sort(atomNeighbours, new SortAtomsForElementSymbols()); + for (int i = atomNeighbours.size() - 1; i >= 0; i--) { + atomsToConsider.addFirst(atomNeighbours.get(i)); } - Collections.sort(atomNeighbours, new SortAtomsForElementSymbols(atomPreviousBondMap)); - nextAtoms.addAll(0, atomNeighbours); } - if (primes.equals("") && backboneAtom.getElement().equals(elementSymbol)){//maybe it meant the starting atom + if (primes.equals("") && backboneAtom.getElement().toString().equals(elementSymbol)){//maybe it meant the starting atom return backboneAtom; } return null; @@ -837,20 +809,20 @@ * Determines whether the bond between two elements is likely to be covalent * This is crudely determined based on whether the combination of elements fall outside the ionic and * metallic sections of a van Arkel diagram - * @param element1 - * @param element2 + * @param chemEl1 + * @param chemEl2 * @return */ - static boolean isCovalent(String element1, String element2) { - Double atom1Electrongegativity = AtomProperties.elementToPaulingElectronegativity.get(element1); - Double atom2Electrongegativity = AtomProperties.elementToPaulingElectronegativity.get(element2); + static boolean isCovalent(ChemEl chemEl1, ChemEl chemEl2) { + Double atom1Electrongegativity = AtomProperties.getPaulingElectronegativity(chemEl1); + Double atom2Electrongegativity = AtomProperties.getPaulingElectronegativity(chemEl2); if (atom1Electrongegativity!=null && atom2Electrongegativity !=null){ double halfSum = (atom1Electrongegativity + atom2Electrongegativity)/2; double difference = Math.abs(atom1Electrongegativity - atom2Electrongegativity); if (halfSum < 1.6){ return false;//probably metallic } - if (difference < 1.39* halfSum -2.2){ + if (difference < 1.76 * halfSum - 3.03){ return true; } } @@ -858,34 +830,19 @@ } /** - * Is the atom a suffix atom or an aldehyde atom or a chalcogen functional atom + * Is the atom a suffix atom/carbon of an aldehyde atom/chalcogen functional atom/hydroxy (or chalcogen equivalent) + * (by special step heterostems are not considered hydroxy e.g. disulfane) * @param atom * @return */ static boolean isCharacteristicAtom(Atom atom) { - if (atom.getType().equals(SUFFIX_TYPE_VAL)){ - return true; - } - if (atom.getProperty(Atom.ISALDEHYDE)!=null && atom.getProperty(Atom.ISALDEHYDE)){//substituting an aldehyde would make it no longer an aldehyde + if (atom.getType().equals(SUFFIX_TYPE_VAL) || + (atom.getElement().isChalcogen() && !HETEROSTEM_SUBTYPE_VAL.equals(atom.getFrag().getSubType()) && + atom.getIncomingValency() == 1 && + atom.getOutValency() == 0 && atom.getCharge() == 0)) { return true; } - - String element =atom.getElement(); - if (element.equals("O")|| element.equals("S") || element.equals("Se") || element.equals("Te")){//potential chalcogen functional atom - boolean isFunctionalAtom =false; - Fragment frag = atom.getFrag(); - for (int i = 0, l = frag.getFunctionalAtomCount(); i < l; i++) { - FunctionalAtom funcAtom = frag.getFunctionalAtom(i); - if (atom.equals(funcAtom.getAtom())){ - isFunctionalAtom =true; - break; - } - } - if (isFunctionalAtom){ - return true; - } - } - return false; + return isFunctionalAtomOrAldehyde(atom); } /** @@ -894,28 +851,31 @@ * @return */ static boolean isFunctionalAtomOrAldehyde(Atom atom) { - if (atom.getProperty(Atom.ISALDEHYDE)!=null && atom.getProperty(Atom.ISALDEHYDE)){//substituting an aldehyde would make it no longer an aldehyde + if (Boolean.TRUE.equals(atom.getProperty(Atom.ISALDEHYDE))){//substituting an aldehyde would make it no longer an aldehyde return true; } - - String element =atom.getElement(); - if (element.equals("O")|| element.equals("S") || element.equals("Se") || element.equals("Te")){//potential chalcogen functional atom - boolean isFunctionalAtom =false; + return isFunctionalAtom(atom); + } + + /** + * Is the atom a chalcogen functional atom + * @param atom + * @return + */ + static boolean isFunctionalAtom(Atom atom) { + ChemEl chemEl = atom.getElement(); + if (chemEl.isChalcogen()) {//potential chalcogen functional atom Fragment frag = atom.getFrag(); for (int i = 0, l = frag.getFunctionalAtomCount(); i < l; i++) { - FunctionalAtom funcAtom = frag.getFunctionalAtom(i); - if (atom.equals(funcAtom.getAtom())){ - isFunctionalAtom =true; - break; + if (atom.equals(frag.getFunctionalAtom(i).getAtom())){ + return true; } } - if (isFunctionalAtom){ - return true; - } } return false; } - + + /** * Checks that all atoms in a ring appear to be equivalent * @param ring @@ -924,11 +884,11 @@ static boolean allAtomsInRingAreIdentical(Fragment ring){ List atomList = ring.getAtomList(); Atom firstAtom = atomList.get(0); - String element = firstAtom.getElement(); + ChemEl chemEl = firstAtom.getElement(); int valency = firstAtom.getIncomingValency(); boolean spareValency = firstAtom.hasSpareValency(); for (Atom atom : atomList) { - if (!atom.getElement().equals(element)){ + if (atom.getElement() != chemEl){ return false; } if (atom.getIncomingValency() != valency){ @@ -940,35 +900,6 @@ } return true; } - - - /** - * Removes a terminal atom of a particular element e.g. oxygen - * A locant may be specified to indicate what atom is adjacent to the atom to be removed - * Formally the atom is replaced by hydrogen, hence stereochemistry is intentionally preserved - * @param state - * @param fragment - * @param element The symbol of the element - * @param locant A locant or null - * @throws StructureBuildingException - */ - static void removeHydroxyLikeTerminalAtom(BuildState state, Fragment fragment, String element, String locant) throws StructureBuildingException { - List applicableTerminalAtoms; - if (locant!=null){ - Atom adjacentAtom = fragment.getAtomByLocantOrThrow(locant); - applicableTerminalAtoms = findHydroxyLikeTerminalAtoms(adjacentAtom.getAtomNeighbours(), element); - if (applicableTerminalAtoms.isEmpty()){ - throw new StructureBuildingException("Unable to find terminal atom of type: " + element + " at locant "+ locant +" for subtractive nomenclature"); - } - } - else{ - applicableTerminalAtoms = findHydroxyLikeTerminalAtoms(fragment.getAtomList(), element); - if (applicableTerminalAtoms.isEmpty()){ - throw new StructureBuildingException("Unable to find terminal atom of type: " + element + " for subtractive nomenclature"); - } - } - removeTerminalAtom(state, applicableTerminalAtoms.get(0)); - } static void removeTerminalAtom(BuildState state, Atom atomToRemove) { AtomParity atomParity = atomToRemove.getAtomNeighbours().get(0).getAtomParity(); @@ -983,19 +914,65 @@ } state.fragManager.removeAtomAndAssociatedBonds(atomToRemove); } + + /** + * Removes a terminal oxygen from the atom + * An exception is thrown if no suitable oxygen could be found connected to the atom + * Note that [N+][O-] is treated as N=O + * @param state + * @param atom + * @param desiredBondOrder + * @throws StructureBuildingException + */ + static void removeTerminalOxygen(BuildState state, Atom atom, int desiredBondOrder) throws StructureBuildingException { + //TODO prioritise [N+][O-] + List neighbours = atom.getAtomNeighbours(); + for (Atom neighbour : neighbours) { + if (neighbour.getElement() == ChemEl.O && neighbour.getBondCount()==1){ + Bond b = atom.getBondToAtomOrThrow(neighbour); + if (b.getOrder()==desiredBondOrder && neighbour.getCharge()==0){ + FragmentTools.removeTerminalAtom(state, neighbour); + if (atom.getLambdaConventionValency()!=null){//corrects valency for phosphin/arsin/stibin + atom.setLambdaConventionValency(atom.getLambdaConventionValency()-desiredBondOrder); + } + if (atom.getMinimumValency()!=null){//corrects valency for phosphin/arsin/stibin + atom.setMinimumValency(atom.getMinimumValency()-desiredBondOrder); + } + return; + } + else if (neighbour.getCharge() ==-1 && b.getOrder()==1 && desiredBondOrder == 2){ + if (atom.getCharge() ==1 && atom.getElement() == ChemEl.N){ + FragmentTools.removeTerminalAtom(state, neighbour); + atom.neutraliseCharge(); + return; + } + } + } + } + if (desiredBondOrder ==2){ + throw new StructureBuildingException("Double bonded oxygen not found at suffix attachment position. Perhaps a suffix has been used inappropriately"); + } + else if (desiredBondOrder ==1){ + throw new StructureBuildingException("Hydroxy oxygen not found at suffix attachment position. Perhaps a suffix has been used inappropriately"); + } + else { + throw new StructureBuildingException("Suitable oxygen not found at suffix attachment position Perhaps a suffix has been used inappropriately"); + } + + } /** * Finds terminal atoms of the given element type from the list given * The terminal atoms be single bonded, not radicals and uncharged * @param atoms - * @param element + * @param chemEl * @return */ - static List findHydroxyLikeTerminalAtoms(List atoms, String element) { + static List findHydroxyLikeTerminalAtoms(List atoms, ChemEl chemEl) { List matches =new ArrayList(); for (Atom atom : atoms) { - if (atom.getElement().equals(element) && atom.getIncomingValency()==1 && + if (atom.getElement() == chemEl && atom.getIncomingValency() == 1 && atom.getOutValency() == 0 && atom.getCharge() == 0){ matches.add(atom); } @@ -1015,7 +992,7 @@ if (fromAtom.getAtomIsInACycle() && toAtom.getAtomIsInACycle()){//obviously both must be in rings //attempt to get from the fromAtom to the toAtom in 6 or fewer steps. List visitedAtoms = new ArrayList(); - LinkedList atomsToInvestigate = new LinkedList();//A queue is not used as I need to make sure that only up to depth 6 is investigated + Deque atomsToInvestigate = new ArrayDeque();//A queue is not used as I need to make sure that only up to depth 6 is investigated List neighbours =fromAtom.getAtomNeighbours(); neighbours.remove(toAtom); for (Atom neighbour : neighbours) { @@ -1026,7 +1003,7 @@ if (atomsToInvestigate.isEmpty()){ break; } - LinkedList atomsToInvestigateNext = new LinkedList(); + Deque atomsToInvestigateNext = new ArrayDeque(); while (!atomsToInvestigate.isEmpty()) { Atom currentAtom =atomsToInvestigate.removeFirst(); if (currentAtom == toAtom){ @@ -1049,24 +1026,23 @@ /** * Finds the hydroxy atom of all hydroxy functional groups in a fragment * i.e. not in carboxylic acid or oxime - * @param biochemicalFragment + * @param frag * @return * @throws StructureBuildingException */ - static List findHydroxyGroups(Fragment biochemicalFragment) throws StructureBuildingException { + static List findHydroxyGroups(Fragment frag) throws StructureBuildingException { List hydroxyAtoms = new ArrayList(); - List atoms = biochemicalFragment.getAtomList(); + List atoms = frag.getAtomList(); for (Atom atom : atoms) { - if (atom.getElement().equals("O") && atom.getBonds().size()==1 && - atom.getFirstBond().getOrder()==1 && atom.getOutValency() == 0 && atom.getCharge() == 0){ + if (atom.getElement() == ChemEl.O && atom.getIncomingValency() == 1 && atom.getOutValency() == 0 && atom.getCharge() == 0){ Atom adjacentAtom = atom.getAtomNeighbours().get(0); List neighbours = adjacentAtom.getAtomNeighbours(); - if (adjacentAtom.getElement().equals("C")){ + if (adjacentAtom.getElement() == ChemEl.C){ neighbours.remove(atom); - if (neighbours.size() >= 1 && neighbours.get(0).getElement().equals("O") && adjacentAtom.getBondToAtomOrThrow(neighbours.get(0)).getOrder()==2){ + if (neighbours.size() >= 1 && neighbours.get(0).getElement() == ChemEl.O && adjacentAtom.getBondToAtomOrThrow(neighbours.get(0)).getOrder()==2){ continue; } - if (neighbours.size() >= 2 && neighbours.get(1).getElement().equals("O") && adjacentAtom.getBondToAtomOrThrow(neighbours.get(1)).getOrder()==2){ + if (neighbours.size() >= 2 && neighbours.get(1).getElement() == ChemEl.O && adjacentAtom.getBondToAtomOrThrow(neighbours.get(1)).getOrder()==2){ continue; } hydroxyAtoms.add(atom); @@ -1075,4 +1051,152 @@ } return hydroxyAtoms; } + + static List findnAtomsForSubstitution(List atomList, Atom preferredAtom, int numberOfSubstitutionsRequired, int bondOrder, boolean takeIntoAccountOutValency) { + int atomCount = atomList.size(); + int startingIndex = preferredAtom != null ? atomList.indexOf(preferredAtom) : 0; + if (startingIndex < 0){ + throw new IllegalArgumentException("OPSIN Bug: preferredAtom should be part of the list of atoms to search through"); + } + CyclicAtomList atoms = new CyclicAtomList(atomList, startingIndex - 1);//next() will retrieve the atom at the startingIndex + List substitutableAtoms = new ArrayList(); + for (int i = 0; i < atomCount; i++) {//aromaticity preserved, standard valency assumed, characteristic atoms ignored + Atom atom = atoms.next(); + if (!FragmentTools.isCharacteristicAtom(atom) || (numberOfSubstitutionsRequired == 1 && atom == preferredAtom)) { + int currentExpectedValency = atom.determineValency(takeIntoAccountOutValency); + int usedValency = atom.getIncomingValency() + (atom.hasSpareValency() ? 1 : 0) + (takeIntoAccountOutValency ? atom.getOutValency() : 0); + int timesAtomCanBeSubstitued = ((currentExpectedValency - usedValency)/ bondOrder); + for (int j = 1; j <= timesAtomCanBeSubstitued; j++) { + substitutableAtoms.add(atom); + } + } + } + if (substitutableAtoms.size() >= numberOfSubstitutionsRequired){ + return substitutableAtoms; + } + substitutableAtoms.clear(); + for (int i = 0; i < atomCount; i++) {//aromaticity preserved, standard valency assumed, functional suffixes ignored + Atom atom = atoms.next(); + if (!FragmentTools.isFunctionalAtomOrAldehyde(atom) || (numberOfSubstitutionsRequired == 1 && atom == preferredAtom)) { + int currentExpectedValency = atom.determineValency(takeIntoAccountOutValency); + int usedValency = atom.getIncomingValency() + (atom.hasSpareValency() ? 1 : 0) + (takeIntoAccountOutValency ? atom.getOutValency() : 0); + int timesAtomCanBeSubstitued = ((currentExpectedValency - usedValency)/ bondOrder); + for (int j = 1; j <= timesAtomCanBeSubstitued; j++) { + substitutableAtoms.add(atom); + } + } + } + if (substitutableAtoms.size() >= numberOfSubstitutionsRequired){ + return substitutableAtoms; + } + substitutableAtoms.clear(); + + for (int i = 0; i < atomCount; i++) {//aromaticity preserved, any sensible valency allowed, anything substitutable + Atom atom = atoms.next(); + Integer maximumValency = ValencyChecker.getMaximumValency(atom); + if (maximumValency != null) { + int usedValency = atom.getIncomingValency() + (atom.hasSpareValency() ? 1 : 0) + (takeIntoAccountOutValency ? atom.getOutValency() : 0); + int timesAtomCanBeSubstitued = ((maximumValency - usedValency)/ bondOrder); + for (int j = 1; j <= timesAtomCanBeSubstitued; j++) { + substitutableAtoms.add(atom); + } + } + else{ + for (int j = 0; j < numberOfSubstitutionsRequired; j++) { + substitutableAtoms.add(atom); + } + } + } + if (substitutableAtoms.size() >= numberOfSubstitutionsRequired){ + return substitutableAtoms; + } + substitutableAtoms.clear(); + + for (int i = 0; i < atomCount; i++) {//aromaticity dropped, any sensible valency allowed, anything substitutable + Atom atom = atoms.next(); + Integer maximumValency = ValencyChecker.getMaximumValency(atom); + if (maximumValency != null) { + int usedValency = atom.getIncomingValency() + (takeIntoAccountOutValency ? atom.getOutValency() : 0); + int timesAtomCanBeSubstitued = ((maximumValency - usedValency)/ bondOrder); + for (int j = 1; j <= timesAtomCanBeSubstitued; j++) { + substitutableAtoms.add(atom); + } + } + else { + for (int j = 0; j < numberOfSubstitutionsRequired; j++) { + substitutableAtoms.add(atom); + } + } + } + if (substitutableAtoms.size() >= numberOfSubstitutionsRequired){ + return substitutableAtoms; + } + return null; + } + + static List findnAtomsForSubstitution(Fragment frag, Atom preferredAtom, int numberOfSubstitutionsRequired, int bondOrder, boolean takeIntoAccountOutValency) { + return findnAtomsForSubstitution(frag.getAtomList(), preferredAtom, numberOfSubstitutionsRequired, bondOrder, takeIntoAccountOutValency); + } + + /** + * Returns a list of atoms of size >= numberOfSubstitutionsDesired (or null if this not possible) + * An atom must have have sufficient valency to support a substituent requiring a bond of order bondOrder + * If an atom can support multiple substituents it will appear in the list multiple times + * This method iterates over the the fragment atoms attempting to fulfil these requirements with incrementally more lenient constraints: + * aromaticity preserved, standard valency assumed, characteristic atoms ignored + * aromaticity preserved, standard valency assumed, functional suffixes ignored + * aromaticity preserved, any sensible valency allowed, anything substitutable + * aromaticity dropped, any sensible valency allowed, anything substitutable + * + * Iteration starts from the defaultInAtom (if applicable, else the first atom) i.e. the defaultInAtom if substitutable will be the first atom in the list + * @param frag + * @param numberOfSubstitutionsRequired + * @param bondOrder + * @return + */ + static List findnAtomsForSubstitution(Fragment frag, int numberOfSubstitutionsRequired, int bondOrder) { + return findnAtomsForSubstitution(frag.getAtomList(), frag.getDefaultInAtom(), numberOfSubstitutionsRequired, bondOrder, true); + } + + /** + * Returns a list of the most preferable atoms for substitution (empty list if none are) + * An atom must have have sufficient valency to support a substituent requiring a bond of order bondOrder + * If an atom can support multiple substituents it will appear in the list multiple times + * This method iterates over the the fragment atoms attempting to fulfil these requirements with incrementally more lenient constraints: + * aromaticity preserved, standard valency assumed, characteristic atoms ignored + * aromaticity preserved, standard valency assumed, functional suffixes ignored + * aromaticity preserved, any sensible valency allowed, anything substitutable + * aromaticity dropped, any sensible valency allowed, anything substitutable + * + * Iteration starts from the defaultInAtom (if applicable, else the first atom) i.e. the defaultInAtom if substitutable will be the first atom in the list + * @param frag + * @param bondOrder + * @return + */ + static List findSubstituableAtoms(Fragment frag, int bondOrder) { + List potentialAtoms = findnAtomsForSubstitution(frag, 1, bondOrder); + if (potentialAtoms == null) { + return Collections.emptyList(); + } + return potentialAtoms; + } + + static Atom lastNonSuffixCarbonWithSufficientValency(Fragment conjunctiveFragment) { + List atomList = conjunctiveFragment.getAtomList(); + for (int i = atomList.size()-1; i >=0; i--) { + Atom a = atomList.get(i); + if (a.getType().equals(SUFFIX_TYPE_VAL)){ + continue; + } + if (a.getElement() != ChemEl.C){ + continue; + } + if (ValencyChecker.checkValencyAvailableForBond(a, 1)){ + return a; + } + } + return null; + } + + } diff -Nru opsin-1.5.0/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/FunctionalReplacement.java opsin-2.3.1/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/FunctionalReplacement.java --- opsin-1.5.0/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/FunctionalReplacement.java 2013-07-21 14:02:29.000000000 +0000 +++ opsin-2.3.1/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/FunctionalReplacement.java 2017-07-23 20:55:18.000000000 +0000 @@ -1,25 +1,19 @@ package uk.ac.cam.ch.wwmm.opsin; import static uk.ac.cam.ch.wwmm.opsin.XmlDeclarations.*; -import static uk.ac.cam.ch.wwmm.opsin.OpsinTools.*; import java.util.ArrayList; import java.util.Collections; import java.util.Comparator; -import java.util.HashSet; +import java.util.Iterator; +import java.util.LinkedHashSet; import java.util.LinkedList; import java.util.List; import java.util.Set; import java.util.regex.Pattern; - -import nu.xom.Attribute; -import nu.xom.Element; -import nu.xom.Elements; -import nu.xom.Node; - /** - * Master methods and convenience methods for performing functional replacement + * Methods for performing functional replacement * @author dl387 * */ @@ -33,8 +27,8 @@ */ private static class SortInfixTransformations implements Comparator { public int compare(String infixTransformation1, String infixTransformation2) { - int allowedInputs1 = MATCH_COMMA.split(infixTransformation1).length; - int allowedInputs2 = MATCH_COMMA.split(infixTransformation2).length; + int allowedInputs1 = infixTransformation1.split(",").length; + int allowedInputs2 = infixTransformation2.split(",").length; if (allowedInputs1 < allowedInputs2){//infixTransformation1 preferred return -1; } @@ -46,45 +40,53 @@ } } } - private final static Pattern matchChalcogen = Pattern.compile("O|S|Se|Te"); - private static final Pattern matchChalcogenReplacement= Pattern.compile("thio|seleno|telluro"); - - enum PREFIX_REPLACEMENT_TYPE{ + private static enum PREFIX_REPLACEMENT_TYPE{ chalcogen,//ambiguous halideOrPseudoHalide,//only mean functional replacement when applied to non carboxylic acids dedicatedFunctionalReplacementPrefix,//no ambiguity exists hydrazono,//ambiguous, only applies to non carboxylic acid peroxy//ambiguous, also applies to etheric oxygen + } + + private static final Pattern matchChalcogenReplacement= Pattern.compile("thio|seleno|telluro"); + + private final BuildState state; + + FunctionalReplacement(BuildState state) { + this.state = state; } /** * Applies the effects of acid replacing functional class nomenclature * This must be performed early so that prefix/infix functional replacement is performed correctly * and so that element symbol locants are assigned appropriately - * @param state * @param finalSubOrRootInWord * @param word * @throws ComponentGenerationException * @throws StructureBuildingException */ - static void processAcidReplacingFunctionalClassNomenclature(BuildState state, Element finalSubOrRootInWord, Element word) throws ComponentGenerationException, StructureBuildingException { + void processAcidReplacingFunctionalClassNomenclature(Element finalSubOrRootInWord, Element word) throws ComponentGenerationException, StructureBuildingException { Element wordRule = OpsinTools.getParentWordRule(word); if (WordRule.valueOf(wordRule.getAttributeValue(WORDRULE_ATR)) == WordRule.acidReplacingFunctionalGroup){ - Element parentWordRule = (Element) word.getParent(); + Element parentWordRule = word.getParent(); if (parentWordRule.indexOf(word)==0){ - List acidReplacingFullWords = XOMTools.getChildElementsWithTagNameAndAttribute(parentWordRule, WORD_EL, TYPE_ATR, WordType.full.toString()); - acidReplacingFullWords.remove(word); - if (acidReplacingFullWords.size()>0){//case where functionalTerm is substituted - //as words are processed from right to left in cases like phosphoric acid tri(ethylamide) this will be phosphoric acid ethylamide ethylamide ethylamide - for (Element acidReplacingWord : acidReplacingFullWords) { - processAcidReplacingFunctionalClassNomenclatureFullWord(state, finalSubOrRootInWord, acidReplacingWord); + for (int i = 1, l = parentWordRule.getChildCount(); i < l ; i++) { + Element acidReplacingWord = parentWordRule.getChild(i); + if (!acidReplacingWord.getName().equals(WORD_EL)) { + throw new RuntimeException("OPSIN bug: problem with acidReplacingFunctionalGroup word rule"); + } + String type = acidReplacingWord.getAttributeValue(TYPE_ATR); + if (type.equals(WordType.full.toString())) { + //case where functionalTerm is substituted + //as words are processed from right to left in cases like phosphoric acid tri(ethylamide) this will be phosphoric acid ethylamide ethylamide ethylamide + processAcidReplacingFunctionalClassNomenclatureFullWord(finalSubOrRootInWord, acidReplacingWord); + } + else if (type.equals(WordType.functionalTerm.toString())) { + processAcidReplacingFunctionalClassNomenclatureFunctionalWord(finalSubOrRootInWord, acidReplacingWord); + } + else { + throw new RuntimeException("OPSIN bug: problem with acidReplacingFunctionalGroup word rule"); } - } - else if (parentWordRule.getChildElements().size()==2) { - processAcidReplacingFunctionalClassNomenclatureFunctionalWord(state, finalSubOrRootInWord, ((Element) XOMTools.getNextSibling(word))); - } - else{ - throw new ComponentGenerationException("OPSIN bug: problem with acidReplacingFunctionalGroup word rule"); } } } @@ -101,14 +103,13 @@ * For heterocyclic rings functional replacement should technically be limited to : * pyran, morpholine, chromene, isochromene and xanthene, chromane and isochromane. * but this is not currently enforced - * @param state * @param groups * @param substituents * @return boolean: has any functional replacement occurred * @throws StructureBuildingException * @throws ComponentGenerationException */ - static boolean processPrefixFunctionalReplacementNomenclature(BuildState state, List groups, List substituents) throws StructureBuildingException, ComponentGenerationException { + boolean processPrefixFunctionalReplacementNomenclature(List groups, List substituents) throws StructureBuildingException, ComponentGenerationException { int originalNumberOfGroups = groups.size(); for (int i = originalNumberOfGroups-1; i >=0; i--) { Element group =groups.get(i); @@ -129,38 +130,38 @@ else if (groupValue.equals("peroxy")){ replacementType =PREFIX_REPLACEMENT_TYPE.peroxy; } - if (replacementType!=null){ + if (replacementType != null) { //need to check whether this is an instance of functional replacement by checking the substituent/root it is applying to - Element substituent =(Element) group.getParent(); - Element nextSubOrBracket = (Element) XOMTools.getNextSibling(substituent); - if (nextSubOrBracket!=null && (nextSubOrBracket.getLocalName().equals(ROOT_EL) || nextSubOrBracket.getLocalName().equals(SUBSTITUENT_EL))){ + Element substituent = group.getParent(); + Element nextSubOrBracket = OpsinTools.getNextSibling(substituent); + if (nextSubOrBracket!=null && (nextSubOrBracket.getName().equals(ROOT_EL) || nextSubOrBracket.getName().equals(SUBSTITUENT_EL))){ Element groupToBeModified = nextSubOrBracket.getFirstChildElement(GROUP_EL); - if (XOMTools.getPreviousSibling(groupToBeModified)!=null){ + if (groupPrecededByElementThatBlocksPrefixReplacementInterpetation(groupToBeModified)) { if (replacementType == PREFIX_REPLACEMENT_TYPE.dedicatedFunctionalReplacementPrefix){ throw new ComponentGenerationException("dedicated Functional Replacement Prefix used in an inappropriate position :" + groupValue); } continue;//not 2,2'-thiodipyran } - Element locantEl =null;//null unless a locant that agrees with the multiplier is present - Element multiplierEl =null; - int numberOfAtomsToReplace =1;//the number of atoms to be functionally replaced, modified by a multiplier e.g. dithio - Element possibleMultiplier = (Element) XOMTools.getPreviousSibling(group); - if (possibleMultiplier !=null){ + Element locantEl = null;//null unless a locant that agrees with the multiplier is present + Element multiplierEl = null; + int numberOfAtomsToReplace = 1;//the number of atoms to be functionally replaced, modified by a multiplier e.g. dithio + Element possibleMultiplier = OpsinTools.getPreviousSibling(group); + if (possibleMultiplier != null) { Element possibleLocant; - if (possibleMultiplier.getLocalName().equals(MULTIPLIER_EL)){ - numberOfAtomsToReplace =Integer.valueOf(possibleMultiplier.getAttributeValue(VALUE_ATR)); - possibleLocant = (Element) XOMTools.getPreviousSibling(possibleMultiplier); + if (possibleMultiplier.getName().equals(MULTIPLIER_EL)) { + numberOfAtomsToReplace = Integer.valueOf(possibleMultiplier.getAttributeValue(VALUE_ATR)); + possibleLocant = OpsinTools.getPreviousSibling(possibleMultiplier); multiplierEl = possibleMultiplier; } else{ possibleLocant = possibleMultiplier; } - if (possibleLocant !=null && possibleLocant.getLocalName().equals(LOCANT_EL) && possibleLocant.getAttribute(TYPE_ATR)==null) { - int numberOfLocants = MATCH_COMMA.split(possibleLocant.getValue()).length; + if (possibleLocant !=null && possibleLocant.getName().equals(LOCANT_EL) && possibleLocant.getAttribute(TYPE_ATR) == null) { + int numberOfLocants = possibleLocant.getValue().split(",").length; if (numberOfLocants == numberOfAtomsToReplace){//locants and number of replacements agree locantEl = possibleLocant; } - else if (numberOfAtomsToReplace >1){//doesn't look like prefix functional replacement + else if (numberOfAtomsToReplace > 1) {//doesn't look like prefix functional replacement if (replacementType == PREFIX_REPLACEMENT_TYPE.dedicatedFunctionalReplacementPrefix){ throw new ComponentGenerationException("dedicated Functional Replacement Prefix used in an inappropriate position :" + groupValue); } @@ -170,45 +171,45 @@ } int oxygenReplaced; - if (replacementType == PREFIX_REPLACEMENT_TYPE.chalcogen){ - oxygenReplaced = performChalcogenFunctionalReplacement(state, groupToBeModified, locantEl, numberOfAtomsToReplace, group.getAttributeValue(VALUE_ATR)); + if (replacementType == PREFIX_REPLACEMENT_TYPE.chalcogen) { + oxygenReplaced = performChalcogenFunctionalReplacement(groupToBeModified, locantEl, numberOfAtomsToReplace, group.getAttributeValue(VALUE_ATR)); } - else if (replacementType == PREFIX_REPLACEMENT_TYPE.peroxy){ - if (nextSubOrBracket.getLocalName().equals(SUBSTITUENT_EL)){ + else if (replacementType == PREFIX_REPLACEMENT_TYPE.peroxy) { + if (nextSubOrBracket.getName().equals(SUBSTITUENT_EL)) { continue; } - oxygenReplaced = performPeroxyFunctionalReplacement(state, groupToBeModified, locantEl, numberOfAtomsToReplace); + oxygenReplaced = performPeroxyFunctionalReplacement(groupToBeModified, locantEl, numberOfAtomsToReplace); } else if (replacementType == PREFIX_REPLACEMENT_TYPE.dedicatedFunctionalReplacementPrefix){ if (!groupToBeModified.getAttributeValue(TYPE_ATR).equals(NONCARBOXYLICACID_TYPE_VAL) && !(groupToBeModified.getValue().equals("form") && groupValue.equals("imido"))){ throw new ComponentGenerationException("dedicated Functional Replacement Prefix used in an inappropriate position :" + groupValue); } - oxygenReplaced = performFunctionalReplacementOnAcid(state, groupToBeModified, locantEl, numberOfAtomsToReplace, group.getAttributeValue(VALUE_ATR)); + oxygenReplaced = performFunctionalReplacementOnAcid(groupToBeModified, locantEl, numberOfAtomsToReplace, group.getAttributeValue(VALUE_ATR)); if (oxygenReplaced==0){ throw new ComponentGenerationException("dedicated Functional Replacement Prefix used in an inappropriate position :" + groupValue); } } else if (replacementType == PREFIX_REPLACEMENT_TYPE.hydrazono || replacementType == PREFIX_REPLACEMENT_TYPE.halideOrPseudoHalide){ - Fragment acidFrag = state.xmlFragmentMap.get(groupToBeModified); + Fragment acidFrag = groupToBeModified.getFrag(); if (!groupToBeModified.getAttributeValue(TYPE_ATR).equals(NONCARBOXYLICACID_TYPE_VAL) || - acidHasSufficientHydrogenForSubstitutionInterpretation(acidFrag, state.xmlFragmentMap.get(group).getOutAtom(0).getValency(), locantEl)){ + acidHasSufficientHydrogenForSubstitutionInterpretation(acidFrag, group.getFrag().getOutAtom(0).getValency(), locantEl)){ //hydrazono replacement only applies to non carboxylic acids e.g. hydrazonooxalic acid //need to be careful to note that something like chlorophosphonic acid isn't functional replacement continue; } - oxygenReplaced = performFunctionalReplacementOnAcid(state, groupToBeModified, locantEl, numberOfAtomsToReplace, group.getAttributeValue(VALUE_ATR)); + oxygenReplaced = performFunctionalReplacementOnAcid(groupToBeModified, locantEl, numberOfAtomsToReplace, group.getAttributeValue(VALUE_ATR)); } else{ throw new StructureBuildingException("OPSIN bug: Unexpected prefix replacement type"); } if (oxygenReplaced>0){ - state.fragManager.removeFragment(state.xmlFragmentMap.get(group)); + state.fragManager.removeFragment(group.getFrag()); substituent.removeChild(group); groups.remove(group); - Elements remainingChildren =substituent.getChildElements();//there may be a locant that should be moved + List remainingChildren =substituent.getChildElements();//there may be a locant that should be moved for (int j = remainingChildren.size()-1; j>=0; j--){ - Node child =substituent.getChild(j); + Element child =substituent.getChild(j); child.detach(); nextSubOrBracket.insertChild(child, 0); } @@ -227,6 +228,21 @@ return groups.size() != originalNumberOfGroups; } + /** + * Currently prefix replacement terms must be directly adjacent to the groupToBeModified with an exception made + * for carbohydrate stereochemistry prefixes e.g. 'gluco' and for substractive prefixes e.g. 'deoxy' + * @param groupToBeModified + * @return + */ + private boolean groupPrecededByElementThatBlocksPrefixReplacementInterpetation(Element groupToBeModified) { + Element previous = OpsinTools.getPreviousSibling(groupToBeModified); + while (previous !=null && (previous.getName().equals(SUBTRACTIVEPREFIX_EL) + || (previous.getName().equals(STEREOCHEMISTRY_EL) && previous.getAttributeValue(TYPE_ATR).equals(CARBOHYDRATECONFIGURATIONPREFIX_TYPE_VAL)))){ + previous = OpsinTools.getPreviousSibling(previous); + } + return previous != null; + } + /* * @@ -234,39 +250,38 @@ /** * Performs functional replacement using infixes e.g. thio in ethanthioic acid replaces an O with S - * @param state * @param suffixFragments May be modified if a multiplier is determined to mean multiplication of a suffix, usually untouched * @param suffixes The suffix elements May be modified if a multiplier is determined to mean multiplication of a suffix, usually untouched * @throws StructureBuildingException * @throws ComponentGenerationException */ - static void processInfixFunctionalReplacementNomenclature(BuildState state, List suffixes, List suffixFragments) throws StructureBuildingException, ComponentGenerationException { + void processInfixFunctionalReplacementNomenclature(List suffixes, List suffixFragments) throws StructureBuildingException, ComponentGenerationException { for (int i = 0; i < suffixes.size(); i++) { Element suffix = suffixes.get(i); - if (suffix.getAttribute(INFIX_ATR)!=null){ - Fragment fragToApplyInfixTo = state.xmlFragmentMap.get(suffix); - Element possibleAcidGroup = XOMTools.getPreviousSiblingIgnoringCertainElements(suffix, new String[]{MULTIPLIER_EL, INFIX_EL, SUFFIX_EL}); - if (possibleAcidGroup !=null && possibleAcidGroup.getLocalName().equals(GROUP_EL) && + if (suffix.getAttribute(INFIX_ATR) != null){ + Fragment fragToApplyInfixTo = suffix.getFrag(); + Element possibleAcidGroup = OpsinTools.getPreviousSiblingIgnoringCertainElements(suffix, new String[]{MULTIPLIER_EL, INFIX_EL, SUFFIX_EL}); + if (possibleAcidGroup !=null && possibleAcidGroup.getName().equals(GROUP_EL) && (possibleAcidGroup.getAttributeValue(TYPE_ATR).equals(NONCARBOXYLICACID_TYPE_VAL)|| possibleAcidGroup.getAttributeValue(TYPE_ATR).equals(CHALCOGENACIDSTEM_TYPE_VAL))){ - fragToApplyInfixTo = state.xmlFragmentMap.get(possibleAcidGroup); + fragToApplyInfixTo = possibleAcidGroup.getFrag(); } if (fragToApplyInfixTo ==null){ throw new ComponentGenerationException("infix has erroneously been assigned to a suffix which does not correspond to a suffix fragment. suffix: " + suffix.getValue()); } //e.g. =O:S,-O:S (which indicates replacing either a double or single bonded oxygen with S) //This is semicolon delimited for each infix - List infixTransformations = StringTools.arrayToList(MATCH_SEMICOLON.split(suffix.getAttributeValue(INFIX_ATR))); + List infixTransformations = StringTools.arrayToList(suffix.getAttributeValue(INFIX_ATR).split(";")); List atomList =fragToApplyInfixTo.getAtomList(); - LinkedList singleBondedOxygen =new LinkedList(); - LinkedList doubleBondedOxygen =new LinkedList(); + LinkedList singleBondedOxygen = new LinkedList(); + LinkedList doubleBondedOxygen = new LinkedList(); populateTerminalSingleAndDoubleBondedOxygen(atomList, singleBondedOxygen, doubleBondedOxygen); int oxygenAvailable = singleBondedOxygen.size() +doubleBondedOxygen.size(); /* * Modifies suffixes, suffixFragments, suffix and infixTransformations as appropriate */ - disambiguateMultipliedInfixMeaning(state, suffixes, suffixFragments, suffix, fragToApplyInfixTo, infixTransformations, oxygenAvailable); + disambiguateMultipliedInfixMeaning(suffixes, suffixFragments, suffix, infixTransformations, oxygenAvailable); /* * Sort infixTransformations so more specific transformations are performed first @@ -275,11 +290,11 @@ Collections.sort(infixTransformations, new SortInfixTransformations()); for (String infixTransformation : infixTransformations) { - String[] transformationArray = MATCH_COLON.split(infixTransformation); + String[] transformationArray = infixTransformation.split(":"); if (transformationArray.length !=2){ throw new StructureBuildingException("Atom to be replaced and replacement not specified correctly in infix: " + infixTransformation); } - String[] transformations = MATCH_COMMA.split(transformationArray[0]); + String[] transformations = transformationArray[0].split(","); String replacementSMILES = transformationArray[1]; boolean acceptDoubleBondedOxygen = false; boolean acceptSingleBondedOxygen = false; @@ -327,7 +342,7 @@ } } - Set ambiguousElementAtoms = new HashSet(); + Set ambiguousElementAtoms = new LinkedHashSet(); Atom atomToUse = null; if ((acceptDoubleBondedOxygen || nitrido) && doubleBondedOxygen.size()>0 ){ atomToUse = doubleBondedOxygen.removeFirst(); @@ -338,12 +353,12 @@ else{ throw new StructureBuildingException("Cannot find oxygen for infix with SMILES: "+ replacementSMILES+ " to modify!");//this would be a bug } - Fragment replacementFrag =state.fragManager.buildSMILES(replacementSMILES, SUFFIX_TYPE_VAL, NONE_LABELS_VAL); + Fragment replacementFrag = state.fragManager.buildSMILES(replacementSMILES, SUFFIX_TYPE_VAL, NONE_LABELS_VAL); if (replacementFrag.getOutAtomCount()>0){//SMILES include an indication of the bond order the replacement fragment will have, this is not intended to be an outatom replacementFrag.removeOutAtom(0); } Atom atomThatWillReplaceOxygen =replacementFrag.getFirstAtom(); - if (replacementFrag.getAtomList().size()==1 && matchChalcogen.matcher(atomThatWillReplaceOxygen.getElement()).matches()){ + if (replacementFrag.getAtomCount()==1 && atomThatWillReplaceOxygen.getElement().isChalcogen()){ atomThatWillReplaceOxygen.setCharge(atomToUse.getCharge()); atomThatWillReplaceOxygen.setProtonsExplicitlyAddedOrRemoved(atomToUse.getProtonsExplicitlyAddedOrRemoved()); } @@ -391,32 +406,32 @@ /** * Replaces the appropriate number of functional oxygen atoms with the corresponding fragment - * @param state * @param acidContainingRoot * @param acidReplacingWord * @throws ComponentGenerationException * @throws StructureBuildingException */ - private static void processAcidReplacingFunctionalClassNomenclatureFullWord(BuildState state, Element acidContainingRoot, Element acidReplacingWord) throws ComponentGenerationException, StructureBuildingException { + private void processAcidReplacingFunctionalClassNomenclatureFullWord(Element acidContainingRoot, Element acidReplacingWord) throws ComponentGenerationException, StructureBuildingException { + String locant = acidReplacingWord.getAttributeValue(LOCANT_ATR); Element acidReplacingGroup = StructureBuildingMethods.findRightMostGroupInBracket(acidReplacingWord); if (acidReplacingGroup ==null){ throw new ComponentGenerationException("OPSIN bug: acid replacing group not found where one was expected for acidReplacingFunctionalGroup wordRule"); } String functionalGroupName = acidReplacingGroup.getValue(); - Fragment acidReplacingFrag = state.xmlFragmentMap.get(acidReplacingGroup); - if (((Element)acidReplacingGroup.getParent()).getChildElements().size()!=1){ + Fragment acidReplacingFrag = acidReplacingGroup.getFrag(); + if (acidReplacingGroup.getParent().getChildCount() != 1){ throw new ComponentGenerationException("Unexpected qualifier to: " + functionalGroupName); } Element groupToBeModified = acidContainingRoot.getFirstChildElement(GROUP_EL); - List oxygenAtoms = findFunctionalOxygenAtomsInApplicableSuffixes(state, groupToBeModified); + List oxygenAtoms = findFunctionalOxygenAtomsInApplicableSuffixes(groupToBeModified); if (oxygenAtoms.size() == 0){ - oxygenAtoms = findFunctionalOxygenAtomsInGroup(state, groupToBeModified); + oxygenAtoms = findFunctionalOxygenAtomsInGroup(groupToBeModified); } if (oxygenAtoms.size() == 0){ - List conjunctiveSuffixElements =XOMTools.getNextSiblingsOfType(groupToBeModified, CONJUNCTIVESUFFIXGROUP_EL); + List conjunctiveSuffixElements =OpsinTools.getNextSiblingsOfType(groupToBeModified, CONJUNCTIVESUFFIXGROUP_EL); for (Element conjunctiveSuffixElement : conjunctiveSuffixElements) { - oxygenAtoms.addAll(findFunctionalOxygenAtomsInGroup(state, conjunctiveSuffixElement)); + oxygenAtoms.addAll(findFunctionalOxygenAtomsInGroup(conjunctiveSuffixElement)); } } if (oxygenAtoms.size() < 1){ @@ -425,7 +440,7 @@ boolean isAmide = functionalGroupName.equals("amide") || functionalGroupName.equals("amid"); if (isAmide) { - if (acidReplacingFrag.getAtomList().size()!=1){ + if (acidReplacingFrag.getAtomCount()!=1){ throw new ComponentGenerationException("OPSIN bug: " + functionalGroupName + " not found where expected"); } Atom amideNitrogen = acidReplacingFrag.getFirstAtom(); @@ -433,21 +448,20 @@ amideNitrogen.clearLocants(); acidReplacingFrag.addMappingToAtomLocantMap("N", amideNitrogen); } - state.fragManager.replaceAtomWithAnotherAtomPreservingConnectivity(oxygenAtoms.get(0), acidReplacingFrag.getFirstAtom()); - state.fragManager.incorporateFragment(acidReplacingFrag, oxygenAtoms.get(0).getFrag()); - removeAssociatedFunctionalAtom(oxygenAtoms.get(0)); + Atom chosenOxygen = locant != null ? removeOxygenWithAppropriateLocant(oxygenAtoms, locant) : oxygenAtoms.get(0); + state.fragManager.replaceAtomWithAnotherAtomPreservingConnectivity(chosenOxygen, acidReplacingFrag.getFirstAtom()); + removeAssociatedFunctionalAtom(chosenOxygen); } /** * Replaces the appropriate number of functional oxygen atoms with the corresponding fragment - * @param state * @param acidContainingRoot * @param functionalWord * @throws ComponentGenerationException * @throws StructureBuildingException */ - private static void processAcidReplacingFunctionalClassNomenclatureFunctionalWord(BuildState state, Element acidContainingRoot, Element functionalWord) throws ComponentGenerationException, StructureBuildingException { + private void processAcidReplacingFunctionalClassNomenclatureFunctionalWord(Element acidContainingRoot, Element functionalWord) throws ComponentGenerationException, StructureBuildingException { if (functionalWord !=null && functionalWord.getAttributeValue(TYPE_ATR).equals(WordType.functionalTerm.toString())){ Element functionalTerm = functionalWord.getFirstChildElement(FUNCTIONALTERM_EL); if (functionalTerm ==null){ @@ -455,28 +469,38 @@ } Element acidReplacingGroup = functionalTerm.getFirstChildElement(FUNCTIONALGROUP_EL); String functionalGroupName = acidReplacingGroup.getValue(); - Element possibleMultiplier = (Element) XOMTools.getPreviousSibling(acidReplacingGroup); + Element possibleLocantOrMultiplier = OpsinTools.getPreviousSibling(acidReplacingGroup); int numberOfAcidicHydroxysToReplace = 1; - if (possibleMultiplier!=null){ - if (!possibleMultiplier.getLocalName().equals(MULTIPLIER_EL)){ - throw new ComponentGenerationException("OPSIN bug: non multiplier found where only a multiplier was expected in acidReplacingFunctionalGroup wordRule"); + String[] locants = null; + if (possibleLocantOrMultiplier != null){ + if (possibleLocantOrMultiplier.getName().equals(MULTIPLIER_EL)){ + numberOfAcidicHydroxysToReplace = Integer.parseInt(possibleLocantOrMultiplier.getAttributeValue(VALUE_ATR)); + possibleLocantOrMultiplier.detach(); + possibleLocantOrMultiplier = OpsinTools.getPreviousSibling(acidReplacingGroup); + } + if (possibleLocantOrMultiplier != null){ + if (possibleLocantOrMultiplier.getName().equals(LOCANT_EL)){ + locants = StringTools.removeDashIfPresent(possibleLocantOrMultiplier.getValue()).split(","); + possibleLocantOrMultiplier.detach(); + } + else { + throw new ComponentGenerationException("Unexpected qualifier to acidReplacingFunctionalGroup functionalTerm"); + } } - numberOfAcidicHydroxysToReplace = Integer.parseInt(possibleMultiplier.getAttributeValue(VALUE_ATR)); - possibleMultiplier.detach(); } - if (functionalTerm.getChildElements().size()!=1){ + if (functionalTerm.getChildCount() != 1){ throw new ComponentGenerationException("Unexpected qualifier to acidReplacingFunctionalGroup functionalTerm"); } Element groupToBeModified = acidContainingRoot.getFirstChildElement(GROUP_EL); - List oxygenAtoms = findFunctionalOxygenAtomsInApplicableSuffixes(state, groupToBeModified); - if (oxygenAtoms.size()==0){ - oxygenAtoms = findFunctionalOxygenAtomsInGroup(state, groupToBeModified); + List oxygenAtoms = findFunctionalOxygenAtomsInApplicableSuffixes(groupToBeModified); + if (oxygenAtoms.size()==0) { + oxygenAtoms = findFunctionalOxygenAtomsInGroup(groupToBeModified); } - if (oxygenAtoms.size()==0){ - List conjunctiveSuffixElements =XOMTools.getNextSiblingsOfType(groupToBeModified, CONJUNCTIVESUFFIXGROUP_EL); + if (oxygenAtoms.size()==0) { + List conjunctiveSuffixElements =OpsinTools.getNextSiblingsOfType(groupToBeModified, CONJUNCTIVESUFFIXGROUP_EL); for (Element conjunctiveSuffixElement : conjunctiveSuffixElements) { - oxygenAtoms.addAll(findFunctionalOxygenAtomsInGroup(state, conjunctiveSuffixElement)); + oxygenAtoms.addAll(findFunctionalOxygenAtomsInGroup(conjunctiveSuffixElement)); } } if (numberOfAcidicHydroxysToReplace > oxygenAtoms.size()){ @@ -485,27 +509,32 @@ boolean isAmide = functionalGroupName.equals("amide") || functionalGroupName.equals("amid"); if (isAmide) { for (int i = 0; i < numberOfAcidicHydroxysToReplace; i++) { - removeAssociatedFunctionalAtom(oxygenAtoms.get(i)); - oxygenAtoms.get(i).setElement("N"); + Atom functionalOxygenToReplace = locants != null ? removeOxygenWithAppropriateLocant(oxygenAtoms, locants[i]) : oxygenAtoms.get(i); + removeAssociatedFunctionalAtom(functionalOxygenToReplace); + functionalOxygenToReplace.setElement(ChemEl.N); } } else{ - Fragment acidReplacingFrag = ComponentProcessor.resolveGroup(state, acidReplacingGroup); - Fragment acidFragment = state.xmlFragmentMap.get(groupToBeModified); + String groupValue = acidReplacingGroup.getAttributeValue(VALUE_ATR); + String labelsValue = acidReplacingGroup.getAttributeValue(LABELS_ATR); + Fragment acidReplacingFrag = state.fragManager.buildSMILES(groupValue, SUFFIX_TYPE_VAL, labelsValue != null ? labelsValue : NONE_LABELS_VAL); + Fragment acidFragment = groupToBeModified.getFrag(); if (acidFragment.hasLocant("2")){//prefer numeric locants on group to those of replacing group for (Atom atom : acidReplacingFrag.getAtomList()) { atom.clearLocants(); } } - state.fragManager.replaceAtomWithAnotherAtomPreservingConnectivity(oxygenAtoms.get(0), acidReplacingFrag.getFirstAtom()); - removeAssociatedFunctionalAtom(oxygenAtoms.get(0)); + Atom firstFunctionalOxygenToReplace = locants != null ? removeOxygenWithAppropriateLocant(oxygenAtoms, locants[0]) : oxygenAtoms.get(0); + state.fragManager.replaceAtomWithAnotherAtomPreservingConnectivity(firstFunctionalOxygenToReplace, acidReplacingFrag.getFirstAtom()); + removeAssociatedFunctionalAtom(firstFunctionalOxygenToReplace); for (int i = 1; i < numberOfAcidicHydroxysToReplace; i++) { Fragment clonedHydrazide = state.fragManager.copyAndRelabelFragment(acidReplacingFrag, i); - state.fragManager.replaceAtomWithAnotherAtomPreservingConnectivity(oxygenAtoms.get(i), clonedHydrazide.getFirstAtom()); - state.fragManager.incorporateFragment(clonedHydrazide, oxygenAtoms.get(i).getFrag()); - removeAssociatedFunctionalAtom(oxygenAtoms.get(i)); + Atom functionalOxygenToReplace = locants != null ? removeOxygenWithAppropriateLocant(oxygenAtoms, locants[i]) : oxygenAtoms.get(i); + state.fragManager.replaceAtomWithAnotherAtomPreservingConnectivity(functionalOxygenToReplace, clonedHydrazide.getFirstAtom()); + state.fragManager.incorporateFragment(clonedHydrazide, functionalOxygenToReplace.getFrag()); + removeAssociatedFunctionalAtom(functionalOxygenToReplace); } - state.fragManager.incorporateFragment(acidReplacingFrag, oxygenAtoms.get(0).getFrag()); + state.fragManager.incorporateFragment(acidReplacingFrag, firstFunctionalOxygenToReplace.getFrag()); } } else{ @@ -513,16 +542,35 @@ } } + private Atom removeOxygenWithAppropriateLocant(List oxygenAtoms, String locant) throws ComponentGenerationException { + for (Iterator iterator = oxygenAtoms.iterator(); iterator.hasNext();) { + Atom atom = iterator.next(); + if (atom.hasLocant(locant)) { + iterator.remove(); + return atom; + } + } + //Look for the case whether the locant refers to the backbone + for (Iterator iterator = oxygenAtoms.iterator(); iterator.hasNext();) { + Atom atom = iterator.next(); + if (OpsinTools.depthFirstSearchForNonSuffixAtomWithLocant(atom, locant) != null){ + iterator.remove(); + return atom; + } + } + throw new ComponentGenerationException("Failed to find acid group at locant: " + locant); + } + /* * Prefix functional replacement nomenclature */ - private static boolean acidHasSufficientHydrogenForSubstitutionInterpretation(Fragment acidFrag, int hydrogenRequiredForSubstitutionInterpretation, Element locantEl) throws StructureBuildingException { + private boolean acidHasSufficientHydrogenForSubstitutionInterpretation(Fragment acidFrag, int hydrogenRequiredForSubstitutionInterpretation, Element locantEl) { List atomsThatWouldBeSubstituted = new ArrayList(); if (locantEl !=null){ - String[] possibleLocants = MATCH_COMMA.split(locantEl.getValue()); + String[] possibleLocants = locantEl.getValue().split(","); for (String locant : possibleLocants) { Atom atomToBeSubstituted = acidFrag.getAtomByLocant(locant); if (atomToBeSubstituted !=null){ @@ -530,13 +578,13 @@ } else{ atomsThatWouldBeSubstituted.clear(); - atomsThatWouldBeSubstituted.add(acidFrag.getDefaultInAtom()); + atomsThatWouldBeSubstituted.add(acidFrag.getDefaultInAtomOrFirstAtom()); break; } } } else{ - atomsThatWouldBeSubstituted.add(acidFrag.getDefaultInAtom()); + atomsThatWouldBeSubstituted.add(acidFrag.getDefaultInAtomOrFirstAtom()); } for (Atom atom : atomsThatWouldBeSubstituted) { if (StructureBuildingMethods.calculateSubstitutableHydrogenAtoms(atom) < hydrogenRequiredForSubstitutionInterpretation){ @@ -549,7 +597,6 @@ /** * Performs replacement of oxygen atoms by chalogen atoms * If this is ambiguous e.g. thioacetate then Atom.AMBIGUOUS_ELEMENT_ASSIGNMENT is populated - * @param state * @param groupToBeModified * @param locantEl * @param numberOfAtomsToReplace @@ -557,15 +604,15 @@ * @return * @throws StructureBuildingException */ - private static int performChalcogenFunctionalReplacement(BuildState state, Element groupToBeModified, Element locantEl, int numberOfAtomsToReplace, String replacementSmiles) throws StructureBuildingException { - List oxygenAtoms = findOxygenAtomsInApplicableSuffixes(state, groupToBeModified); - if (oxygenAtoms.size()==0){ - oxygenAtoms = findOxygenAtomsInGroup(state, groupToBeModified); + private int performChalcogenFunctionalReplacement(Element groupToBeModified, Element locantEl, int numberOfAtomsToReplace, String replacementSmiles) throws StructureBuildingException { + List oxygenAtoms = findOxygenAtomsInApplicableSuffixes(groupToBeModified); + if (oxygenAtoms.size() == 0) { + oxygenAtoms = findOxygenAtomsInGroup(groupToBeModified); } - if (locantEl !=null){//locants are used to indicate replacement on trivial groups + if (locantEl != null) {//locants are used to indicate replacement on trivial groups List oxygenWithAppropriateLocants = pickOxygensWithAppropriateLocants(locantEl, oxygenAtoms); - if(oxygenWithAppropriateLocants.size() < numberOfAtomsToReplace){ - numberOfAtomsToReplace =1; + if(oxygenWithAppropriateLocants.size() < numberOfAtomsToReplace) { + numberOfAtomsToReplace = 1; //e.g. -1-thioureidomethyl } else{ @@ -579,7 +626,7 @@ List ethericOxygen = new ArrayList(); for (Atom oxygen : oxygenAtoms) { int incomingValency = oxygen.getIncomingValency(); - int bondCount = oxygen.getBonds().size(); + int bondCount = oxygen.getBondCount(); if (bondCount==1 && incomingValency==2){ doubleBondedOxygen.add(oxygen); } @@ -590,7 +637,7 @@ ethericOxygen.add(oxygen); } } - List replaceableAtoms = new LinkedList(); + List replaceableAtoms = new ArrayList(); replaceableAtoms.addAll(doubleBondedOxygen); replaceableAtoms.addAll(singleBondedOxygen); replaceableAtoms.addAll(ethericOxygen); @@ -604,7 +651,7 @@ int atomsReplaced =0; if (totalOxygen >=numberOfAtomsToReplace){//check that there atleast as many oxygens as requested replacements boolean prefixAssignmentAmbiguous =false; - Set ambiguousElementAtoms = new HashSet(); + Set ambiguousElementAtoms = new LinkedHashSet(); if (totalOxygen != numberOfAtomsToReplace){ prefixAssignmentAmbiguous=true; } @@ -636,18 +683,17 @@ /** * Converts functional oxygen to peroxy e.g. peroxybenzoic acid * Returns the number of oxygen replaced - * @param state * @param groupToBeModified * @param locantEl * @param numberOfAtomsToReplace * @return * @throws StructureBuildingException */ - private static int performPeroxyFunctionalReplacement(BuildState state, Element groupToBeModified, Element locantEl, int numberOfAtomsToReplace) throws StructureBuildingException { - List oxygenAtoms = findFunctionalOxygenAtomsInApplicableSuffixes(state, groupToBeModified); + private int performPeroxyFunctionalReplacement(Element groupToBeModified, Element locantEl, int numberOfAtomsToReplace) throws StructureBuildingException { + List oxygenAtoms = findFunctionalOxygenAtomsInApplicableSuffixes(groupToBeModified); if (oxygenAtoms.size()==0){ - oxygenAtoms = findEthericOxygenAtomsInGroup(state, groupToBeModified); - oxygenAtoms.addAll(findFunctionalOxygenAtomsInGroup(state, groupToBeModified)); + oxygenAtoms = findEthericOxygenAtomsInGroup(groupToBeModified); + oxygenAtoms.addAll(findFunctionalOxygenAtomsInGroup(groupToBeModified)); } if (locantEl !=null){ List oxygenWithAppropriateLocants = pickOxygensWithAppropriateLocants(locantEl, oxygenAtoms); @@ -667,20 +713,20 @@ atomsReplaced = numberOfAtomsToReplace; for (int j = 0; j < numberOfAtomsToReplace; j++) { Atom oxygenToReplace = oxygenAtoms.get(j); - if (oxygenToReplace.getBonds().size()==2){//etheric oxygen + if (oxygenToReplace.getBondCount()==2){//etheric oxygen Fragment newOxygen = state.fragManager.buildSMILES("O", SUFFIX_TYPE_VAL, NONE_LABELS_VAL); Bond bondToRemove = oxygenToReplace.getFirstBond(); Atom atomToAttachTo = bondToRemove.getFromAtom() == oxygenToReplace ? bondToRemove.getToAtom() : bondToRemove.getFromAtom(); state.fragManager.createBond(atomToAttachTo, newOxygen.getFirstAtom(), 1); state.fragManager.createBond(newOxygen.getFirstAtom(), oxygenToReplace, 1); state.fragManager.removeBond(bondToRemove); - state.fragManager.incorporateFragment(newOxygen, state.xmlFragmentMap.get(groupToBeModified)); + state.fragManager.incorporateFragment(newOxygen, groupToBeModified.getFrag()); } else{ Fragment replacementFrag = state.fragManager.buildSMILES("OO", SUFFIX_TYPE_VAL, NONE_LABELS_VAL); removeOrMoveObsoleteFunctionalAtoms(oxygenToReplace, replacementFrag); state.fragManager.replaceAtomWithAnotherAtomPreservingConnectivity(oxygenToReplace, replacementFrag.getFirstAtom()); - state.fragManager.incorporateFragment(replacementFrag, state.xmlFragmentMap.get(groupToBeModified)); + state.fragManager.incorporateFragment(replacementFrag, groupToBeModified.getFrag()); } } } @@ -692,7 +738,6 @@ * SMILES with a valency 1 outAtom replace -O, SMILES with a valency 2 outAtom replace =O * SMILES with a valency 3 outAtom replace -O and =O (nitrido) * Returns the number of oxygen replaced - * @param state * @param groupToBeModified * @param locantEl * @param numberOfAtomsToReplace @@ -700,7 +745,7 @@ * @return * @throws StructureBuildingException */ - private static int performFunctionalReplacementOnAcid(BuildState state, Element groupToBeModified, Element locantEl, int numberOfAtomsToReplace, String replacementSmiles) throws StructureBuildingException { + private int performFunctionalReplacementOnAcid(Element groupToBeModified, Element locantEl, int numberOfAtomsToReplace, String replacementSmiles) throws StructureBuildingException { int outValency; if (replacementSmiles.startsWith("-")){ outValency =1; @@ -715,14 +760,14 @@ throw new StructureBuildingException("OPSIN bug: Unexpected valency on fragment for prefix functional replacement"); } replacementSmiles = replacementSmiles.substring(1); - List oxygenAtoms = findOxygenAtomsInApplicableSuffixes(state, groupToBeModified); + List oxygenAtoms = findOxygenAtomsInApplicableSuffixes(groupToBeModified); if (oxygenAtoms.size()==0){ - oxygenAtoms = findOxygenAtomsInGroup(state, groupToBeModified); + oxygenAtoms = findOxygenAtomsInGroup(groupToBeModified); } if (locantEl !=null){//locants are used to indicate replacement on trivial groups List oxygenWithAppropriateLocants = pickOxygensWithAppropriateLocants(locantEl, oxygenAtoms); - LinkedList singleBondedOxygen =new LinkedList(); - LinkedList terminalDoubleBondedOxygen =new LinkedList(); + List singleBondedOxygen = new ArrayList(); + List terminalDoubleBondedOxygen = new ArrayList(); populateTerminalSingleAndDoubleBondedOxygen(oxygenWithAppropriateLocants, singleBondedOxygen, terminalDoubleBondedOxygen); if (outValency ==1){ oxygenWithAppropriateLocants.removeAll(terminalDoubleBondedOxygen); @@ -739,8 +784,8 @@ oxygenAtoms = oxygenWithAppropriateLocants; } } - LinkedList singleBondedOxygen =new LinkedList(); - LinkedList terminalDoubleBondedOxygen =new LinkedList(); + List singleBondedOxygen = new ArrayList(); + List terminalDoubleBondedOxygen = new ArrayList(); populateTerminalSingleAndDoubleBondedOxygen(oxygenAtoms, singleBondedOxygen, terminalDoubleBondedOxygen); if (outValency ==1){ oxygenAtoms.removeAll(terminalDoubleBondedOxygen); @@ -768,10 +813,10 @@ continue; } else{ - Fragment replacementFrag = state.fragManager.buildSMILES(replacementSmiles, atomToReplace.getFrag().getType(), NONE_LABELS_VAL); + Fragment replacementFrag = state.fragManager.buildSMILES(replacementSmiles, atomToReplace.getFrag().getTokenEl(), NONE_LABELS_VAL); if (outValency ==3){//special case for nitrido atomToReplace.getFirstBond().setOrder(3); - Atom removedHydroxy = singleBondedOxygen.removeFirst(); + Atom removedHydroxy = singleBondedOxygen.remove(0); state.fragManager.removeAtomAndAssociatedBonds(removedHydroxy); removeAssociatedFunctionalAtom(removedHydroxy); } @@ -796,23 +841,21 @@ * This block handles infix multiplication. Unless brackets are provided this is ambiguous without knowledge of the suffix that is being modified * For example butandithione could be intepreted as butandi(thione) or butan(dithi)one. * Obviously the latter is wrong in this case but it is the correct interpretation for butandithiate - * @param state * @param suffixes * @param suffixFragments * @param suffix - * @param suffixFrag * @param infixTransformations * @param oxygenAvailable * @throws ComponentGenerationException * @throws StructureBuildingException */ - private static void disambiguateMultipliedInfixMeaning(BuildState state,List suffixes, - List suffixFragments,Element suffix, Fragment suffixFrag, List infixTransformations, int oxygenAvailable) + private void disambiguateMultipliedInfixMeaning(List suffixes, + List suffixFragments,Element suffix, List infixTransformations, int oxygenAvailable) throws ComponentGenerationException, StructureBuildingException { - Element possibleInfix =(Element) XOMTools.getPreviousSibling(suffix); - if (possibleInfix.getLocalName().equals(INFIX_EL)){//the infix is only left when there was ambiguity - Element possibleMultiplier =(Element) XOMTools.getPreviousSibling(possibleInfix); - if (possibleMultiplier.getLocalName().equals(MULTIPLIER_EL)){ + Element possibleInfix = OpsinTools.getPreviousSibling(suffix); + if (possibleInfix.getName().equals(INFIX_EL)){//the infix is only left when there was ambiguity + Element possibleMultiplier = OpsinTools.getPreviousSibling(possibleInfix); + if (possibleMultiplier.getName().equals(MULTIPLIER_EL)){ int multiplierValue =Integer.parseInt(possibleMultiplier.getAttributeValue(VALUE_ATR)); if (infixTransformations.size() + multiplierValue-1 <=oxygenAvailable){//multiplier means multiply the infix e.g. butandithiate for (int j = 1; j < multiplierValue; j++) { @@ -820,10 +863,10 @@ } } else{ - Element possibleLocant =(Element)XOMTools.getPreviousSibling(possibleMultiplier); + Element possibleLocant = OpsinTools.getPreviousSibling(possibleMultiplier); String[] locants = null; - if (possibleLocant.getLocalName().equals(LOCANT_EL)) { - locants = MATCH_COMMA.split(possibleLocant.getValue()); + if (possibleLocant.getName().equals(LOCANT_EL)) { + locants = possibleLocant.getValue().split(","); } if (locants !=null){ if (locants.length!=multiplierValue){ @@ -833,11 +876,11 @@ } suffix.addAttribute(new Attribute(MULTIPLIED_ATR, "multiplied")); for (int j = 1; j < multiplierValue; j++) {//multiplier means multiply the infixed suffix e.g. butandithione - Element newSuffix =new Element(suffix); - Fragment newSuffixFrag =state.fragManager.copyFragment(suffixFrag); - state.xmlFragmentMap.put(newSuffix, newSuffixFrag); + Element newSuffix = suffix.copy(); + Fragment newSuffixFrag = state.fragManager.copyFragment(suffix.getFrag()); + newSuffix.setFrag(newSuffixFrag); suffixFragments.add(newSuffixFrag); - XOMTools.insertAfter(suffix, newSuffix); + OpsinTools.insertAfter(suffix, newSuffix); suffixes.add(newSuffix); if (locants !=null){//assign locants if available newSuffix.getAttribute(LOCANT_ATR).setValue(locants[j]); @@ -868,7 +911,7 @@ * @param atomToBeReplaced * @param replacementFrag */ - private static void removeOrMoveObsoleteFunctionalAtoms(Atom atomToBeReplaced, Fragment replacementFrag){ + private void removeOrMoveObsoleteFunctionalAtoms(Atom atomToBeReplaced, Fragment replacementFrag){ List replacementAtomList = replacementFrag.getAtomList(); Fragment origFrag = atomToBeReplaced.getFrag(); for (int i = origFrag.getFunctionalAtomCount() - 1; i >=0; i--) { @@ -876,7 +919,7 @@ if (atomToBeReplaced.equals(functionalAtom.getAtom())){ atomToBeReplaced.getFrag().removeFunctionalAtom(i); Atom terminalAtomOfReplacementFrag = replacementAtomList.get(replacementAtomList.size()-1); - if ((terminalAtomOfReplacementFrag.getIncomingValency() ==1 || replacementAtomList.size()==1)&& matchChalcogen.matcher(terminalAtomOfReplacementFrag.getElement()).matches()){ + if ((terminalAtomOfReplacementFrag.getIncomingValency() ==1 || replacementAtomList.size()==1)&& terminalAtomOfReplacementFrag.getElement().isChalcogen()){ replacementFrag.addFunctionalAtom(terminalAtomOfReplacementFrag); terminalAtomOfReplacementFrag.setCharge(atomToBeReplaced.getCharge()); terminalAtomOfReplacementFrag.setProtonsExplicitlyAddedOrRemoved(atomToBeReplaced.getProtonsExplicitlyAddedOrRemoved()); @@ -893,7 +936,7 @@ * @param atomToBeReplaced * @param replacementFrag */ - private static void moveObsoleteOutAtoms(Atom atomToBeReplaced, Fragment replacementFrag){ + private void moveObsoleteOutAtoms(Atom atomToBeReplaced, Fragment replacementFrag){ if (atomToBeReplaced.getOutValency() >0){//this is not known to occur in well formed IUPAC names but would occur in thioxy (as a suffix) List replacementAtomList = replacementFrag.getAtomList(); Fragment origFrag = atomToBeReplaced.getFrag(); @@ -908,7 +951,7 @@ } } - private static void removeAssociatedFunctionalAtom(Atom atomWithFunctionalAtom) throws StructureBuildingException { + private void removeAssociatedFunctionalAtom(Atom atomWithFunctionalAtom) throws StructureBuildingException { Fragment frag = atomWithFunctionalAtom.getFrag(); for (int i = frag.getFunctionalAtomCount() - 1; i >=0; i--) { FunctionalAtom functionalAtom = frag.getFunctionalAtom(i); @@ -928,25 +971,35 @@ * @param oxygenAtoms * @return */ - private static List pickOxygensWithAppropriateLocants(Element locantEl, List oxygenAtoms) { - String[] possibleLocants = MATCH_COMMA.split(locantEl.getValue()); + private List pickOxygensWithAppropriateLocants(Element locantEl, List oxygenAtoms) { + String[] possibleLocants = locantEl.getValue().split(","); + + boolean pLocantSpecialCase = (possibleLocants.length == 1 && possibleLocants[0].equals("P")); List oxygenWithAppropriateLocants = new ArrayList(); for (Atom atom : oxygenAtoms) { List atomlocants = atom.getLocants(); - if (atomlocants.size()>0){ + if (atomlocants.size() > 0) { for (String locantVal : possibleLocants) { - if (atomlocants.contains(locantVal)){ + if (atomlocants.contains(locantVal)) { oxygenWithAppropriateLocants.add(atom); break; } } } - else{ + else if (pLocantSpecialCase) { + for (Atom neighbour : atom.getAtomNeighbours()) { + if (neighbour.getElement() == ChemEl.P) { + oxygenWithAppropriateLocants.add(atom); + break; + } + } + } + else { Atom atomWithNumericLocant = OpsinTools.depthFirstSearchForAtomWithNumericLocant(atom); - if (atomWithNumericLocant!=null){ + if (atomWithNumericLocant != null) { List atomWithNumericLocantLocants = atomWithNumericLocant.getLocants(); for (String locantVal : possibleLocants) { - if (atomWithNumericLocantLocants.contains(locantVal)){ + if (atomWithNumericLocantLocants.contains(locantVal)) { oxygenWithAppropriateLocants.add(atom); break; } @@ -959,19 +1012,18 @@ /** * Returns oxygen atoms in suffixes with functionalAtoms - * @param state * @param groupToBeModified * @return */ - private static List findFunctionalOxygenAtomsInApplicableSuffixes(BuildState state, Element groupToBeModified) { - List suffixElements =XOMTools.getNextSiblingsOfType(groupToBeModified, SUFFIX_EL); + private List findFunctionalOxygenAtomsInApplicableSuffixes(Element groupToBeModified) { + List suffixElements =OpsinTools.getNextSiblingsOfType(groupToBeModified, SUFFIX_EL); List oxygenAtoms = new ArrayList(); for (Element suffix : suffixElements) { - Fragment suffixFrag = state.xmlFragmentMap.get(suffix); + Fragment suffixFrag = suffix.getFrag(); if (suffixFrag != null) {//null for non carboxylic acids for (int i = 0, l = suffixFrag.getFunctionalAtomCount(); i < l; i++) { Atom a = suffixFrag.getFunctionalAtom(i).getAtom(); - if (a.getElement().equals("O")) { + if (a.getElement() == ChemEl.O) { oxygenAtoms.add(a); } } @@ -982,16 +1034,15 @@ /** * Returns functional oxygen atoms in groupToBeModified - * @param state * @param groupToBeModified * @return */ - private static List findFunctionalOxygenAtomsInGroup(BuildState state, Element groupToBeModified) { + private List findFunctionalOxygenAtomsInGroup(Element groupToBeModified) { List oxygenAtoms = new ArrayList(); - Fragment frag = state.xmlFragmentMap.get(groupToBeModified); + Fragment frag = groupToBeModified.getFrag(); for (int i = 0, l = frag.getFunctionalAtomCount(); i < l; i++) { Atom a = frag.getFunctionalAtom(i).getAtom(); - if (a.getElement().equals("O")){ + if (a.getElement() == ChemEl.O){ oxygenAtoms.add(a); } } @@ -1001,15 +1052,14 @@ /** * Returns etheric oxygen atoms in groupToBeModified - * @param state * @param groupToBeModified * @return */ - private static List findEthericOxygenAtomsInGroup(BuildState state, Element groupToBeModified) { + private List findEthericOxygenAtomsInGroup(Element groupToBeModified) { List oxygenAtoms = new ArrayList(); - List atomList = state.xmlFragmentMap.get(groupToBeModified).getAtomList(); + List atomList = groupToBeModified.getFrag().getAtomList(); for (Atom a: atomList) { - if (a.getElement().equals("O") && a.getBonds().size()==2 && a.getCharge()==0 && a.getIncomingValency()==2){ + if (a.getElement() == ChemEl.O && a.getBondCount()==2 && a.getCharge()==0 && a.getIncomingValency()==2){ oxygenAtoms.add(a); } } @@ -1019,20 +1069,19 @@ /** * Returns oxygen atoms in suffixes with functionalAtoms or acidStem suffixes or aldehyde suffixes (1979 C-531) - * @param state * @param groupToBeModified * @return */ - private static List findOxygenAtomsInApplicableSuffixes(BuildState state, Element groupToBeModified) { - List suffixElements =XOMTools.getNextSiblingsOfType(groupToBeModified, SUFFIX_EL); + private List findOxygenAtomsInApplicableSuffixes(Element groupToBeModified) { + List suffixElements =OpsinTools.getNextSiblingsOfType(groupToBeModified, SUFFIX_EL); List oxygenAtoms = new ArrayList(); for (Element suffix : suffixElements) { - Fragment suffixFrag = state.xmlFragmentMap.get(suffix); + Fragment suffixFrag = suffix.getFrag(); if (suffixFrag != null) {//null for non carboxylic acids if (suffixFrag.getFunctionalAtomCount() > 0 || groupToBeModified.getAttributeValue(TYPE_ATR).equals(ACIDSTEM_TYPE_VAL) || suffix.getAttributeValue(VALUE_ATR).equals("aldehyde")) { List atomList = suffixFrag.getAtomList(); for (Atom a : atomList) { - if (a.getElement().equals("O")) { + if (a.getElement() == ChemEl.O) { oxygenAtoms.add(a); } } @@ -1044,15 +1093,14 @@ /** * Returns oxygen atoms in groupToBeModified - * @param state * @param groupToBeModified * @return */ - private static List findOxygenAtomsInGroup(BuildState state, Element groupToBeModified) { + private List findOxygenAtomsInGroup(Element groupToBeModified) { List oxygenAtoms = new ArrayList(); - List atomList = state.xmlFragmentMap.get(groupToBeModified).getAtomList(); + List atomList = groupToBeModified.getFrag().getAtomList(); for (Atom a : atomList) { - if (a.getElement().equals("O")){ + if (a.getElement() == ChemEl.O){ oxygenAtoms.add(a); } } @@ -1060,10 +1108,10 @@ } - private static void populateTerminalSingleAndDoubleBondedOxygen(List atomList, LinkedList singleBondedOxygen,LinkedList doubleBondedOxygen) throws StructureBuildingException { + private void populateTerminalSingleAndDoubleBondedOxygen(List atomList, List singleBondedOxygen, List doubleBondedOxygen) throws StructureBuildingException { for (Atom a : atomList) { - if (a.getElement().equals("O")){//find terminal oxygens - if (a.getBonds().size()==1){ + if (a.getElement() == ChemEl.O){//find terminal oxygens + if (a.getBondCount()==1){ int incomingValency = a.getIncomingValency(); if (incomingValency ==2){ doubleBondedOxygen.add(a); diff -Nru opsin-1.5.0/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/FusedRingBuilder.java opsin-2.3.1/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/FusedRingBuilder.java --- opsin-1.5.0/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/FusedRingBuilder.java 2013-07-21 14:02:29.000000000 +0000 +++ opsin-2.3.1/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/FusedRingBuilder.java 2017-07-23 20:55:18.000000000 +0000 @@ -6,17 +6,14 @@ import java.util.HashMap; import java.util.HashSet; import java.util.LinkedHashSet; -import java.util.LinkedList; import java.util.List; +import java.util.Locale; import java.util.Map; import java.util.Set; import static uk.ac.cam.ch.wwmm.opsin.OpsinTools.*; import static uk.ac.cam.ch.wwmm.opsin.XmlDeclarations.*; -import nu.xom.Element; -import nu.xom.Elements; - /** * Assembles fused rings named using fusion nomenclature * @author dl387 @@ -28,13 +25,13 @@ private final Element lastGroup; private final Fragment parentRing; private final Map fragmentInScopeForEachFusionLevel = new HashMap(); - private Map atomsToRemoveToReplacementAtom = new HashMap(); + private final Map atomsToRemoveToReplacementAtom = new HashMap(); private FusedRingBuilder(BuildState state, List groupsInFusedRing) { this.state = state; this.groupsInFusedRing = groupsInFusedRing; lastGroup = groupsInFusedRing.get(groupsInFusedRing.size()-1); - parentRing = state.xmlFragmentMap.get(lastGroup); + parentRing = lastGroup.getFrag(); fragmentInScopeForEachFusionLevel.put(0, parentRing); } @@ -45,7 +42,7 @@ * @throws StructureBuildingException */ static void processFusedRings(BuildState state, Element subOrRoot) throws StructureBuildingException { - List groups = XOMTools.getChildElementsWithTagName(subOrRoot, GROUP_EL); + List groups = subOrRoot.getChildElements(GROUP_EL); if (groups.size() < 2){ return;//nothing to fuse } @@ -56,12 +53,12 @@ if (i!=0){ Element startingEl = group; if ((group.getValue().equals("benz") || group.getValue().equals("benzo")) && FUSIONRING_SUBTYPE_VAL.equals(group.getAttributeValue(SUBTYPE_ATR))){ - Element beforeBenzo = (Element) XOMTools.getPreviousSibling(group); - if (beforeBenzo !=null && beforeBenzo.getLocalName().equals(LOCANT_EL)){ + Element beforeBenzo = OpsinTools.getPreviousSibling(group); + if (beforeBenzo !=null && beforeBenzo.getName().equals(LOCANT_EL)){ startingEl = beforeBenzo; } } - Element possibleGroup = XOMTools.getPreviousSiblingIgnoringCertainElements(startingEl, new String[]{MULTIPLIER_EL, FUSION_EL}); + Element possibleGroup = OpsinTools.getPreviousSiblingIgnoringCertainElements(startingEl, new String[]{MULTIPLIER_EL, FUSION_EL}); if (!groups.get(i-1).equals(possibleGroup)){//end of fused ring system if (groupsInFusedRing.size()>=2){ //This will be invoked in cases where there are multiple fused ring systems in the same subOrRoot such as some spiro systems @@ -96,8 +93,8 @@ parentFragments.add(parentRing); int numberOfParents = 1; - Element possibleMultiplier = (Element) XOMTools.getPreviousSibling(lastGroup); - if (nameComponents.size()>0 && possibleMultiplier !=null && possibleMultiplier.getLocalName().equals(MULTIPLIER_EL)){ + Element possibleMultiplier = OpsinTools.getPreviousSibling(lastGroup); + if (nameComponents.size()>0 && possibleMultiplier !=null && possibleMultiplier.getName().equals(MULTIPLIER_EL)){ numberOfParents = Integer.parseInt(possibleMultiplier.getAttributeValue(VALUE_ATR)); possibleMultiplier.detach(); for (int j = 1; j < numberOfParents; j++) { @@ -120,33 +117,33 @@ int fusionLevel = (nameComponents.size()-1 -ncIndice)/2; for (; ncIndice>=0; ncIndice--) { Element fusion = null; - if (nameComponents.get(ncIndice).getLocalName().equals(FUSION_EL)){ + if (nameComponents.get(ncIndice).getName().equals(FUSION_EL)){ fusion = nameComponents.get(ncIndice--); } - if (ncIndice <0 || !nameComponents.get(ncIndice).getLocalName().equals(GROUP_EL)){ + if (ncIndice <0 || !nameComponents.get(ncIndice).getName().equals(GROUP_EL)){ throw new StructureBuildingException("Group not found where group expected. This is probably a bug"); } - Fragment nextComponent = state.xmlFragmentMap.get(nameComponents.get(ncIndice)); + Fragment nextComponent = nameComponents.get(ncIndice).getFrag(); int multiplier = 1; - Element possibleMultiplierEl = (Element) XOMTools.getPreviousSibling(nameComponents.get(ncIndice));//e.g. the di of difuro - if (possibleMultiplierEl != null && possibleMultiplierEl.getLocalName().equals(MULTIPLIER_EL)){ + Element possibleMultiplierEl = OpsinTools.getPreviousSibling(nameComponents.get(ncIndice));//e.g. the di of difuro + if (possibleMultiplierEl != null && possibleMultiplierEl.getName().equals(MULTIPLIER_EL)){ multiplier = Integer.parseInt(possibleMultiplierEl.getAttributeValue(VALUE_ATR)); } String[] fusionDescriptors =null; if (fusion !=null){ - String fusionDescriptorString = fusion.getValue().toLowerCase().substring(1, fusion.getValue().length()-1); + String fusionDescriptorString = fusion.getValue().toLowerCase(Locale.ROOT).substring(1, fusion.getValue().length()-1); if (multiplier ==1){ fusionDescriptors = new String[]{fusionDescriptorString}; } else{ - if (MATCH_SEMICOLON.split(fusionDescriptorString).length >1){ - fusionDescriptors = MATCH_SEMICOLON.split(fusionDescriptorString); + if (fusionDescriptorString.split(";").length >1){ + fusionDescriptors = fusionDescriptorString.split(";"); } - else if (MATCH_COLON.split(fusionDescriptorString).length >1){ - fusionDescriptors = MATCH_COLON.split(fusionDescriptorString); + else if (fusionDescriptorString.split(":").length >1){ + fusionDescriptors = fusionDescriptorString.split(":"); } - else if (MATCH_COMMA.split(fusionDescriptorString).length >1){ - fusionDescriptors = MATCH_COMMA.split(fusionDescriptorString); + else if (fusionDescriptorString.split(",").length >1){ + fusionDescriptors = fusionDescriptorString.split(","); } else{//multiplier does not appear to mean multiplied component. Could be indicating multiplication of the whole fused ring system if (ncIndice!=0){ @@ -174,13 +171,13 @@ Fragment component = fusionComponents[j]; componentFragments.add(component); if (fusion !=null){ - if (MATCH_COLON.split(fusionDescriptors[j]).length==1){//A fusion bracket without a colon is used when applying to the parent component (except in a special case where locants are ommitted) + if (fusionDescriptors[j].split(":").length==1){//A fusion bracket without a colon is used when applying to the parent component (except in a special case where locants are ommitted) //check for case of omitted locant from a higher order fusion bracket e.g. cyclopenta[4,5]pyrrolo[2,3-c]pyridine - if (MATCH_DASH.split(fusionDescriptors[j]).length==1 && - MATCH_COMMA.split(fusionDescriptors[j]).length >1 && + if (fusionDescriptors[j].split("-").length==1 && + fusionDescriptors[j].split(",").length >1 && FragmentTools.allAtomsInRingAreIdentical(component) - && ((StringTools.countTerminalPrimes(MATCH_COMMA.split(fusionDescriptors[j])[0])) != fusionLevel) ){//Could be like cyclopenta[3,4]cyclobuta[1,2]benzene where the first fusion to occur has parent locants omitted not child locants - int numberOfPrimes = StringTools.countTerminalPrimes(MATCH_COMMA.split(fusionDescriptors[j])[0]); + && ((StringTools.countTerminalPrimes(fusionDescriptors[j].split(",")[0])) != fusionLevel) ){//Could be like cyclopenta[3,4]cyclobuta[1,2]benzene where the first fusion to occur has parent locants omitted not child locants + int numberOfPrimes = StringTools.countTerminalPrimes(fusionDescriptors[j].split(",")[0]); //note that this is the number of primes on the parent ring. So would expect the child ring and hence the fusionLevel to be 1 higher if (numberOfPrimes + 1 != fusionLevel){ if (numberOfPrimes + 2 == fusionLevel){//ring could be in previous fusion level e.g. the benzo in benzo[10,11]phenanthro[2',3',4',5',6':4,5,6,7]chryseno[1,2,3-bc]coronene @@ -191,7 +188,7 @@ } } relabelAccordingToFusionLevel(component, fusionLevel); - List numericalLocantsOfParent = Arrays.asList(MATCH_COMMA.split(fusionDescriptors[j])); + List numericalLocantsOfParent = Arrays.asList(fusionDescriptors[j].split(",")); List numericalLocantsOfChild = findPossibleNumericalLocants(component, determineAtomsToFuse(fragmentInScopeForEachFusionLevel.get(fusionLevel), numericalLocantsOfParent, null).size()-1); processHigherOrderFusionDescriptors(component, fragmentInScopeForEachFusionLevel.get(fusionLevel), numericalLocantsOfChild, numericalLocantsOfParent); } @@ -218,7 +215,7 @@ } else{ //determine number of primes in fusor and hence determine fusion level - int numberOfPrimes = -j + StringTools.countTerminalPrimes(MATCH_COMMA.split(fusionDescriptors[j])[0]); + int numberOfPrimes = -j + StringTools.countTerminalPrimes(fusionDescriptors[j].split(",")[0]); if (numberOfPrimes != fusionLevel){ if (fusionLevel == numberOfPrimes +1){ fusionLevel--; @@ -256,10 +253,8 @@ Element fusedRingEl =lastGroup;//reuse this element to save having to remap suffixes... fusedRingEl.getAttribute(VALUE_ATR).setValue(fusedRingName.toString()); - fusedRingEl.removeAttribute(fusedRingEl.getAttribute(VALTYPE_ATR)); fusedRingEl.getAttribute(TYPE_ATR).setValue(RING_TYPE_VAL); - fusedRingEl.getAttribute(SUBTYPE_ATR).setValue(FUSEDRING_SUBTYPE_VAL); - XOMTools.setTextChild(fusedRingEl, fusedRingName.toString()); + fusedRingEl.setValue(fusedRingName.toString()); for (Element element : nameComponents) { element.detach(); @@ -281,17 +276,17 @@ List nameComponents = new ArrayList(); Element currentEl = groupsInFusedRing.get(0); while(currentEl != lastGroup){ - if (currentEl.getLocalName().equals(GROUP_EL) || currentEl.getLocalName().equals(FUSION_EL)){ + if (currentEl.getName().equals(GROUP_EL) || currentEl.getName().equals(FUSION_EL)){ nameComponents.add(currentEl); } - currentEl = (Element) XOMTools.getNextSibling(currentEl); + currentEl = OpsinTools.getNextSibling(currentEl); } return nameComponents; } private void processRingNumberingAndIrregularities() throws StructureBuildingException { for (Element group : groupsInFusedRing) { - Fragment ring = state.xmlFragmentMap.get(group); + Fragment ring = group.getFrag(); if (ALKANESTEM_SUBTYPE_VAL.equals(group.getAttributeValue(SUBTYPE_ATR))){ aromatiseCyclicAlkane(group); } @@ -305,7 +300,7 @@ } } if (group.getAttribute(FUSEDRINGNUMBERING_ATR) != null) { - String[] standardNumbering = MATCH_SLASH.split(group.getAttributeValue(FUSEDRINGNUMBERING_ATR), -1); + String[] standardNumbering = group.getAttributeValue(FUSEDRINGNUMBERING_ATR).split("/", -1); for (int j = 0; j < standardNumbering.length; j++) { atomList.get(j).replaceLocants(standardNumbering[j]); } @@ -326,18 +321,21 @@ * This is necessary as this unsaturator can only refer to the HW ring and for names like 2-Benzoxazolinone to avoid confusion as to what the 2 refers to. * @param group * @param ring - * @throws StructureBuildingException */ - private void processPartiallyUnsaturatedHWSystems(Element group, Fragment ring) throws StructureBuildingException { + private void processPartiallyUnsaturatedHWSystems(Element group, Fragment ring) { if (HANTZSCHWIDMAN_SUBTYPE_VAL.equals(group.getAttributeValue(SUBTYPE_ATR)) && group.getAttribute(ADDBOND_ATR)!=null){ - List unsaturators = XOMTools.getNextAdjacentSiblingsOfType(group, UNSATURATOR_EL); + List unsaturators = OpsinTools.getNextAdjacentSiblingsOfType(group, UNSATURATOR_EL); if (unsaturators.size()>0){ Element unsaturator = unsaturators.get(0); if (unsaturator.getAttribute(LOCANT_ATR)==null && unsaturator.getAttributeValue(VALUE_ATR).equals("2")){ unsaturator.detach(); - Bond bondToUnsaturate = StructureBuildingMethods.findBondToUnSaturate(ring.getAtomList(), 2, true); - bondToUnsaturate.getFromAtom().setSpareValency(true); - bondToUnsaturate.getToAtom().setSpareValency(true); + List bondsToUnsaturate = StructureBuildingMethods.findBondsToUnSaturate(ring, 2, true); + if (bondsToUnsaturate.size() == 0) { + throw new RuntimeException("Failed to find bond to unsaturate on partially saturated HW ring"); + } + Bond b = bondsToUnsaturate.get(0); + b.getFromAtom().setSpareValency(true); + b.getToAtom().setSpareValency(true); } } } @@ -352,11 +350,11 @@ * @param cyclicAlkaneGroup */ private void aromatiseCyclicAlkane(Element cyclicAlkaneGroup) { - Element next = (Element) XOMTools.getNextSibling(cyclicAlkaneGroup); + Element next = OpsinTools.getNextSibling(cyclicAlkaneGroup); List unsaturators = new ArrayList(); - while (next!=null && next.getLocalName().equals(UNSATURATOR_EL)){ + while (next!=null && next.getName().equals(UNSATURATOR_EL)){ unsaturators.add(next); - next = (Element) XOMTools.getNextSibling(next); + next = OpsinTools.getNextSibling(next); } boolean conjugate =true; if (unsaturators.size()==1){ @@ -387,7 +385,7 @@ for (Element unsaturator : unsaturators) { unsaturator.detach(); } - List atomList =state.xmlFragmentMap.get(cyclicAlkaneGroup).getAtomList(); + List atomList = cyclicAlkaneGroup.getFrag().getAtomList(); for (Atom atom : atomList) { atom.setSpareValency(true); } @@ -405,20 +403,20 @@ break; } Element fusion = null; - if (nameComponents.get(i).getLocalName().equals(FUSION_EL)){ + if (nameComponents.get(i).getName().equals(FUSION_EL)){ fusion = nameComponents.get(i--); } else{ throw new StructureBuildingException("Fusion bracket not found where fusion bracket expected"); } - if (i <0 || !nameComponents.get(i).getLocalName().equals(GROUP_EL)){ + if (i <0 || !nameComponents.get(i).getName().equals(GROUP_EL)){ throw new StructureBuildingException("Group not found where group expected. This is probably a bug"); } - Fragment nextComponent = state.xmlFragmentMap.get(nameComponents.get(i)); + Fragment nextComponent = nameComponents.get(i).getFrag(); relabelAccordingToFusionLevel(nextComponent, fusionLevel); int multiplier = 1; - Element possibleMultiplierEl = (Element) XOMTools.getPreviousSibling(nameComponents.get(i)); - if (possibleMultiplierEl != null && possibleMultiplierEl.getLocalName().equals(MULTIPLIER_EL)){ + Element possibleMultiplierEl = OpsinTools.getPreviousSibling(nameComponents.get(i)); + if (possibleMultiplierEl != null && possibleMultiplierEl.getName().equals(MULTIPLIER_EL)){ multiplier = Integer.parseInt(possibleMultiplierEl.getAttributeValue(VALUE_ATR)); possibleMultiplierEl.detach(); } @@ -437,16 +435,16 @@ if (multiplier>1 && multiplier != previousFusionLevelFragments.size()){ throw new StructureBuildingException("Mismatch between number of components and number of parents in fused ring system"); } - String fusionDescriptorString = fusion.getValue().toLowerCase().substring(1, fusion.getValue().length()-1); + String fusionDescriptorString = fusion.getValue().toLowerCase(Locale.ROOT).substring(1, fusion.getValue().length()-1); String[] fusionDescriptors =null; - if (MATCH_SEMICOLON.split(fusionDescriptorString).length >1){ - fusionDescriptors = MATCH_SEMICOLON.split(fusionDescriptorString); + if (fusionDescriptorString.split(";").length >1){ + fusionDescriptors = fusionDescriptorString.split(";"); } - else if (MATCH_COLON.split(fusionDescriptorString).length >1){ - fusionDescriptors = MATCH_COLON.split(fusionDescriptorString); + else if (fusionDescriptorString.split(":").length >1){ + fusionDescriptors = fusionDescriptorString.split(":"); } - else if (MATCH_COMMA.split(fusionDescriptorString).length >1){ - fusionDescriptors = MATCH_COMMA.split(fusionDescriptorString); + else if (fusionDescriptorString.split(",").length >1){ + fusionDescriptors = fusionDescriptorString.split(","); } else{ throw new StructureBuildingException("Invalid fusion descriptor: " + fusionDescriptorString); @@ -458,7 +456,7 @@ String fusionDescriptor = fusionDescriptors[j]; Fragment component = multiplier>1 ? fusionComponents.get(j) : nextComponent; Fragment parentToUse = previousFusionLevelFragments.get(j); - boolean simpleFusion = MATCH_COLON.split(fusionDescriptor).length <= 1; + boolean simpleFusion = fusionDescriptor.split(":").length <= 1; if (simpleFusion){ String[] fusionArray = determineNumericalAndLetterComponents(fusionDescriptor); if (fusionArray[1].length() != 0){ @@ -497,7 +495,7 @@ * @return */ private String[] determineNumericalAndLetterComponents(String fusionDescriptor) { - String[] fusionArray = MATCH_DASH.split(fusionDescriptor); + String[] fusionArray = fusionDescriptor.split("-"); if (fusionArray.length ==2){ return fusionArray; } @@ -521,16 +519,17 @@ * @throws StructureBuildingException */ private void processBenzoFusions() throws StructureBuildingException { - for(int i= groupsInFusedRing.size() -2;i >=0; i--) { - if (groupsInFusedRing.get(i).getValue().equals("benz") || groupsInFusedRing.get(i).getValue().equals("benzo")){ - Element possibleFusionbracket = (Element) XOMTools.getNextSibling(groupsInFusedRing.get(i)); - if (!possibleFusionbracket.getLocalName().equals(FUSION_EL)){ - Element possibleMultiplier = (Element) XOMTools.getPreviousSibling(groupsInFusedRing.get(i)); - if (possibleMultiplier==null || !possibleMultiplier.getLocalName().equals(MULTIPLIER_EL)|| possibleMultiplier.getAttributeValue(TYPE_ATR).equals(GROUP_TYPE_VAL)){ + for(int i = groupsInFusedRing.size() - 2; i >= 0; i--) { + Element group = groupsInFusedRing.get(i); + if (group.getValue().equals("benz") || group.getValue().equals("benzo")) { + Element possibleFusionbracket = OpsinTools.getNextSibling(group); + if (!possibleFusionbracket.getName().equals(FUSION_EL)) { + Element possibleMultiplier = OpsinTools.getPreviousSibling(group); + if (possibleMultiplier == null || !possibleMultiplier.getName().equals(MULTIPLIER_EL) || possibleMultiplier.getAttributeValue(TYPE_ATR).equals(GROUP_TYPE_VAL)) { //e.g. 2-benzofuran. Fused rings of this type are a special case treated as being a single component //and have a special convention for indicating the position of heteroatoms - benzoSpecificFusion(groupsInFusedRing.get(i), groupsInFusedRing.get(i+1)); - groupsInFusedRing.get(i).detach(); + benzoSpecificFusion(group, groupsInFusedRing.get(i + 1)); + group.detach(); groupsInFusedRing.remove(i); } } @@ -564,9 +563,9 @@ List numericalLocantsOfChild = null; List letterLocantsOfParent = null; if (fusionDescriptor != null){ - String[] fusionArray = MATCH_DASH.split(fusionDescriptor); + String[] fusionArray = fusionDescriptor.split("-"); if (fusionArray.length ==2){ - numericalLocantsOfChild = Arrays.asList(MATCH_COMMA.split(fusionArray[0])); + numericalLocantsOfChild = Arrays.asList(fusionArray[0].split(",")); char[] tempLetterLocantsOfParent = fusionArray[1].toCharArray(); letterLocantsOfParent = new ArrayList(); for (char letterLocantOfParent : tempLetterLocantsOfParent) { @@ -575,7 +574,7 @@ } else{ if (fusionArray[0].contains(",")){//only has digits - String[] numericalLocantsOfChildTemp = MATCH_COMMA.split(fusionArray[0]); + String[] numericalLocantsOfChildTemp = fusionArray[0].split(","); numericalLocantsOfChild = Arrays.asList(numericalLocantsOfChildTemp); } else{//only has letters @@ -622,34 +621,32 @@ * @return */ private List findPossibleLetterLocants(Fragment ring, int edgeLength) { - List atomlist = ring.getAtomList(); - List letterLocantsOfParent = null; - List carbonAtoms = new ArrayList(); - atomlist.add(0, atomlist.get(atomlist.size()-1));//this atomList is a copy so we can safely do this - for (int i =atomlist.size() -1; i >=0; i--) {//iterate backwards in list to use highest locanted edge in preference. + List carbonAtomIndexes = new ArrayList(); + int numberOfAtoms = ring.getAtomCount(); + CyclicAtomList cyclicAtomList = new CyclicAtomList(ring.getAtomList()); + for (int i = 0; i <= numberOfAtoms; i++) { + //iterate backwards in list to use highest locanted edge in preference. //this retains what is currently locant 1 on the parent ring as locant 1 if the first two atoms found match - Atom atom = atomlist.get(i); - if (atom.getElement().equals("C")){ - if (atom.getIncomingValency()>=3){ - carbonAtoms.clear(); - continue;//don't want bridgehead carbons - } - carbonAtoms.add(atom); - if (carbonAtoms.size() ==edgeLength +1 ){//as many in a row as edgelength ->use this side - letterLocantsOfParent = new ArrayList(); - Collections.reverse(carbonAtoms); - atomlist.remove(0); + //the last atom in the list is potentially tested twice e.g. on a 6 membered ring, 6-5 and 1-6 are both possible + Atom atom = cyclicAtomList.previous(); + //want non-bridgehead carbon atoms. Double-check that these carbon atoms are actually bonded (e.g. von baeyer systems have non-consecutive atom numbering!) + if (atom.getElement() == ChemEl.C && atom.getBondCount() == 2 + && (carbonAtomIndexes.size() == 0 || atom.getAtomNeighbours().contains(cyclicAtomList.peekNext()))){ + carbonAtomIndexes.add(cyclicAtomList.getIndex()); + if (carbonAtomIndexes.size() == edgeLength + 1){//as many carbons in a row as to give that edgelength ->use these side/s + Collections.reverse(carbonAtomIndexes); + List letterLocantsOfParent = new ArrayList(); for (int j = 0; j < edgeLength; j++) { - letterLocantsOfParent.add(String.valueOf((char)(97 +atomlist.indexOf(carbonAtoms.get(j)))));//97 is ascii for a + letterLocantsOfParent.add(String.valueOf((char)(97 + carbonAtomIndexes.get(j))));//97 is ascii for a } - break; + return letterLocantsOfParent; } } else{ - carbonAtoms.clear(); + carbonAtomIndexes.clear(); } } - return letterLocantsOfParent; + return null; } /** @@ -661,30 +658,29 @@ * @return */ private List findPossibleNumericalLocants(Fragment ring, int edgeLength) { - List atomlist = ring.getAtomList(); - List numericalLocantsOfChild = null; List carbonLocants = new ArrayList(); - atomlist.add(atomlist.get(0));//this atomList is a copy so we can safely do this - for (Atom atom : atomlist) { - if (atom.getElement().equals("C")){ - if (atom.getIncomingValency()>=3){ - carbonLocants.clear(); - continue;//don't want bridgehead carbons - } + int numberOfAtoms = ring.getAtomCount(); + CyclicAtomList cyclicAtomList = new CyclicAtomList(ring.getAtomList()); + for (int i = 0; i <= numberOfAtoms; i++) { + //the last atom in the list is potentially tested twice e.g. on a 6 membered ring, 1-2 and 6-1 are both possible + Atom atom = cyclicAtomList.next(); + //want non-bridgehead carbon atoms. Double-check that these carbon atoms are actually bonded (e.g. von baeyer systems have non-consecutive atom numbering!) + if (atom.getElement() == ChemEl.C && atom.getBondCount() == 2 + && (carbonLocants.size() == 0 || atom.getAtomNeighbours().contains(cyclicAtomList.peekPrevious()))){ carbonLocants.add(atom.getFirstLocant()); - if (carbonLocants.size()==edgeLength +1){//as many in a row as edgelength ->use this side - numericalLocantsOfChild = new ArrayList(); + if (carbonLocants.size() == edgeLength + 1){//as many carbons in a row as to give that edgelength ->use these side/s + List numericalLocantsOfChild = new ArrayList(); for (String locant : carbonLocants) { numericalLocantsOfChild.add(locant); } - break; + return numericalLocantsOfChild; } } else{ carbonLocants.clear(); } } - return numericalLocantsOfChild; + return null; } /** @@ -706,7 +702,7 @@ CyclicAtomList cyclicListAtomsOnSurfaceOfParent = new CyclicAtomList(parentPeripheralAtomList, (int)letterLocantsOfParent.get(0).charAt(0) -97);//convert from lower case character through ascii to 0-23 parentAtoms.add(cyclicListAtomsOnSurfaceOfParent.getCurrent()); for (int i = 0; i < letterLocantsOfParent.size(); i++) { - parentAtoms.add(cyclicListAtomsOnSurfaceOfParent.getNext()); + parentAtoms.add(cyclicListAtomsOnSurfaceOfParent.next()); } fuseRings(childAtoms, parentAtoms); } @@ -747,10 +743,10 @@ private void performHigherOrderFusion(String fusionDescriptor, Fragment nextComponent, Fragment fusedRing) throws StructureBuildingException { List numericalLocantsOfChild = null; List numericalLocantsOfParent = null; - String[] fusionArray = MATCH_COLON.split(fusionDescriptor); + String[] fusionArray = fusionDescriptor.split(":"); if (fusionArray.length ==2){ - numericalLocantsOfChild = Arrays.asList(MATCH_COMMA.split(fusionArray[0])); - numericalLocantsOfParent = Arrays.asList(MATCH_COMMA.split(fusionArray[1])); + numericalLocantsOfChild = Arrays.asList(fusionArray[0].split(",")); + numericalLocantsOfParent = Arrays.asList(fusionArray[1].split(",")); } else{ throw new StructureBuildingException("Malformed fusion bracket: This is an OPSIN bug, check regexTokens.xml"); @@ -798,8 +794,8 @@ List potentialFusionAtomsAscending = new ArrayList(); potentialFusionAtomsAscending.add(cyclicRingAtomList.getCurrent()); - while (cyclicRingAtomList.getIndice() != indexfinal){//assume numbers are ascending - potentialFusionAtomsAscending.add(cyclicRingAtomList.getNext()); + while (cyclicRingAtomList.getIndex() != indexfinal){//assume numbers are ascending + potentialFusionAtomsAscending.add(cyclicRingAtomList.next()); } if (expectedNumberOfAtomsToBeUsedForFusion ==null ||expectedNumberOfAtomsToBeUsedForFusion == potentialFusionAtomsAscending.size()){ boolean notInPotentialParentAtoms =false; @@ -814,11 +810,11 @@ } if (fusionAtoms ==null || expectedNumberOfAtomsToBeUsedForFusion ==null){//that didn't work, so try assuming the numbers are descending - cyclicRingAtomList.setIndice(indexfirst); + cyclicRingAtomList.setIndex(indexfirst); List potentialFusionAtomsDescending = new ArrayList(); potentialFusionAtomsDescending.add(cyclicRingAtomList.getCurrent()); - while (cyclicRingAtomList.getIndice() != indexfinal){//assume numbers are descending - potentialFusionAtomsDescending.add(cyclicRingAtomList.getPrevious()); + while (cyclicRingAtomList.getIndex() != indexfinal){//assume numbers are descending + potentialFusionAtomsDescending.add(cyclicRingAtomList.previous()); } if (expectedNumberOfAtomsToBeUsedForFusion ==null || expectedNumberOfAtomsToBeUsedForFusion == potentialFusionAtomsDescending.size()){ boolean notInPotentialParentAtoms =false; @@ -872,7 +868,7 @@ if (childAtom.hasSpareValency()){ parentAtom.setSpareValency(true); } - if (!parentAtom.getElement().equals(childAtom.getElement())){ + if (parentAtom.getElement() != childAtom.getElement()){ throw new StructureBuildingException("Invalid fusion descriptor: Heteroatom placement is ambiguous as it is not present in both components of the fusion"); } atomsToRemoveToReplacementAtom.put(childAtom, parentAtom); @@ -945,50 +941,51 @@ * @throws StructureBuildingException */ private void benzoSpecificFusion(Element benzoEl, Element parentEl) throws StructureBuildingException { - /* * Perform the fusion, number it and associate it with the parentEl */ - Fragment benzoRing = state.xmlFragmentMap.get(benzoEl); - Fragment parentRing = state.xmlFragmentMap.get(parentEl); + Fragment benzoRing = benzoEl.getFrag(); + Fragment parentRing = parentEl.getFrag(); performSimpleFusion(null, benzoRing , parentRing); state.fragManager.incorporateFragment(benzoRing, parentRing); removeMergedAtoms(); FusedRingNumberer.numberFusedRing(parentRing);//numbers the fused ring; Fragment fusedRing =parentRing; + setBenzoHeteroatomPositioning(benzoEl, fusedRing); + } - /* - * Check for locants and use these to set the heteroatom positions - */ - Element locantEl = (Element) XOMTools.getPreviousSibling(benzoEl); - if (locantEl != null && locantEl.getLocalName().equals(LOCANT_EL)) { - String[] locants = MATCH_COMMA.split(locantEl.getValue()); - Elements suffixes=((Element)benzoEl.getParent()).getChildElements(SUFFIX_EL); - int suffixesWithoutLocants =0; - for (int i = 0; i < suffixes.size(); i++) { - if (suffixes.get(i).getAttribute(LOCANT_ATR)==null){ - suffixesWithoutLocants++; - } - } - if (locants.length != suffixesWithoutLocants){//In preference locants will be assigned to suffixes rather than to this nomenclature + /** + * Checks for locant(s) before benzo and uses these to set + * @param benzoEl + * @param fusedRing + * @throws StructureBuildingException + */ + private void setBenzoHeteroatomPositioning(Element benzoEl, Fragment fusedRing) throws StructureBuildingException { + Element locantEl = OpsinTools.getPreviousSibling(benzoEl); + if (locantEl != null && locantEl.getName().equals(LOCANT_EL)) { + String[] locants = locantEl.getValue().split(","); + if (locantsCouldApplyToHeteroatomPositions(locants, benzoEl)) { List atomList =fusedRing.getAtomList(); - LinkedList heteroatoms =new LinkedList(); - LinkedList elementOfHeteroAtom =new LinkedList(); + List heteroatoms = new ArrayList(); + List elementOfHeteroAtom = new ArrayList(); for (Atom atom : atomList) {//this iterates in the same order as the numbering system - if (!atom.getElement().equals("C")){ + if (atom.getElement() != ChemEl.C){ heteroatoms.add(atom); elementOfHeteroAtom.add(atom.getElement()); } } if (locants.length == heteroatoms.size()){//as many locants as there are heteroatoms to assign - for (Atom atom : heteroatoms) { - atom.setElement("C"); - } - for (int i=0; i< heteroatoms.size(); i ++){ - String elementSymbol =elementOfHeteroAtom.get(i); - fusedRing.getAtomByLocantOrThrow(locants[i]).setElement(elementSymbol); + //check for special case of a single locant indicating where the group substitutes e.g. 4-benzofuran-2-yl + if (!(locants.length == 1 && OpsinTools.getPreviousSibling(locantEl) == null + && ComponentProcessor.checkLocantPresentOnPotentialRoot(state, benzoEl.getParent(), locants[0]))) { + for (Atom atom : heteroatoms) { + atom.setElement(ChemEl.C); + } + for (int i=0; i< heteroatoms.size(); i++) { + fusedRing.getAtomByLocantOrThrow(locants[i]).setElement(elementOfHeteroAtom.get(i)); + } + locantEl.detach(); } - locantEl.detach(); } else if (locants.length > 1){ throw new StructureBuildingException("Unable to assign all locants to benzo-fused ring or multiplier was mising"); @@ -996,4 +993,30 @@ } } } + + private boolean locantsCouldApplyToHeteroatomPositions(String[] locants, Element benzoEl) { + if (!locantsAreAllNumeric(locants)) { + return false; + } + List suffixes = benzoEl.getParent().getChildElements(SUFFIX_EL); + int suffixesWithoutLocants = 0; + for (Element suffix : suffixes) { + if (suffix.getAttribute(LOCANT_ATR)==null){ + suffixesWithoutLocants++; + } + } + if (locants.length == suffixesWithoutLocants){//In preference locants will be assigned to suffixes rather than to this nomenclature + return false; + } + return true; + } + + private boolean locantsAreAllNumeric(String[] locants) { + for (String locant : locants) { + if (!MATCH_NUMERIC_LOCANT.matcher(locant).matches()){ + return false; + } + } + return true; + } } diff -Nru opsin-1.5.0/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/FusedRingNumberer.java opsin-2.3.1/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/FusedRingNumberer.java --- opsin-1.5.0/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/FusedRingNumberer.java 2013-07-21 14:02:29.000000000 +0000 +++ opsin-2.3.1/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/FusedRingNumberer.java 2017-07-23 20:55:18.000000000 +0000 @@ -3,6 +3,7 @@ import java.util.ArrayList; import java.util.Collections; import java.util.Comparator; +import java.util.EnumMap; import java.util.HashMap; import java.util.LinkedHashMap; import java.util.List; @@ -18,6 +19,7 @@ * */ class FusedRingNumberer { + private static final Logger LOG = Logger.getLogger(FusedRingNumberer.class); private static class RingConnectivityTable { final List ringShapes = new ArrayList(); @@ -55,13 +57,16 @@ } } - enum FusionRingShape{ + enum FusionRingShape { enterFromLeftHouse,//5 membered ring enterFromTopLeftHouse,//5 membered ring enterFromTopRightHouse,//5 membered ring enterFromRightHouse,//5 membered ring - enterFromLeftSevenMembered,//7 membered ring, modified from 6 membered at bottom - enterFromRightSevenMembered,//7 membered ring, modified from 6 membered at top + enterFromLeftSevenMembered,//7 membered ring + enterFromTopSevenMembered,//7 membered ring + enterFromRightSevenMembered,//7 membered ring + enterFromBottomRightSevenMembered,//7 membered ring + enterFromBottomLeftSevenMembered,//7 membered ring standard } @@ -106,18 +111,18 @@ //Give low numbers for the heteroatoms as a set. while(i < sequenceA.size()){ Atom atomA=sequenceA.get(i); - boolean isAaHeteroatom =!atomA.getElement().equals("C"); + boolean isAaHeteroatom = atomA.getElement() != ChemEl.C; //bridgehead carbon do not increment numbering - if (!isAaHeteroatom && atomA.getIncomingValency()>=3){ + if (!isAaHeteroatom && atomA.getBondCount()>=3){ i++; continue; } Atom atomB=sequenceB.get(j); - boolean isBaHeteroatom =!atomB.getElement().equals("C"); - if (!isBaHeteroatom && atomB.getIncomingValency()>=3){ + boolean isBaHeteroatom =atomB.getElement() != ChemEl.C; + if (!isBaHeteroatom && atomB.getBondCount()>=3){ j++; continue; } @@ -138,30 +143,23 @@ Atom atomA=sequenceA.get(i); //bridgehead carbon do not increment numbering - if (atomA.getElement().equals("C")&& atomA.getIncomingValency()>=3){ + if (atomA.getElement() == ChemEl.C && atomA.getBondCount()>=3){ i++; continue; } Atom atomB=sequenceB.get(j); - if (atomB.getElement().equals("C") && atomB.getIncomingValency()>=3){ + if (atomB.getElement() == ChemEl.C && atomB.getBondCount()>=3){ j++; continue; } - int atomAElementValue, atomBElementValue; - if (heteroAtomValues.containsKey(atomA.getElement())){ - atomAElementValue = heteroAtomValues.get(atomA.getElement()); - } - else{ - atomAElementValue=0; - } - if (heteroAtomValues.containsKey(atomB.getElement())){ - atomBElementValue = heteroAtomValues.get(atomB.getElement()); - } - else{ - atomBElementValue=0; - } + Integer heteroAtomPriorityA = heteroAtomValues.get(atomA.getElement()); + int atomAElementValue = heteroAtomPriorityA != null ? heteroAtomPriorityA : 0; + + Integer heteroAtomPriorityB = heteroAtomValues.get(atomB.getElement()); + int atomBElementValue = heteroAtomPriorityB != null ? heteroAtomPriorityB : 0; + if (atomAElementValue > atomBElementValue){ return -1; } @@ -175,13 +173,13 @@ for ( i = 0; i < sequenceA.size(); i++) { Atom atomA=sequenceA.get(i); Atom atomB=sequenceB.get(i); - if (atomA.getIncomingValency()>=3 && atomA.getElement().equals("C")){ - if (!(atomB.getIncomingValency()>=3 && atomB.getElement().equals("C"))){ + if (atomA.getBondCount()>=3 && atomA.getElement() == ChemEl.C){ + if (!(atomB.getBondCount()>=3 && atomB.getElement() == ChemEl.C)){ return -1; } } - if (atomB.getIncomingValency()>=3 && atomB.getElement().equals("C")){ - if (!(atomA.getIncomingValency()>=3 && atomA.getElement().equals("C"))){ + if (atomB.getBondCount()>=3 && atomB.getElement() == ChemEl.C){ + if (!(atomA.getBondCount()>=3 && atomA.getElement() == ChemEl.C)){ return 1; } } @@ -193,13 +191,13 @@ for (i = 0; i < sequenceA.size(); i++) { Atom atomA=sequenceA.get(i); Atom atomB=sequenceB.get(i); - if (atomA.getIncomingValency()>=3){ - if (!(atomB.getIncomingValency()>=3)){ + if (atomA.getBondCount()>=3){ + if (!(atomB.getBondCount()>=3)){ return -1; } } - if (atomB.getIncomingValency()>=3){ - if (!(atomA.getIncomingValency()>=3)){ + if (atomB.getBondCount()>=3){ + if (!(atomA.getBondCount()>=3)){ return 1; } } @@ -209,32 +207,32 @@ } } - private static final HashMap heteroAtomValues =new HashMap(); + private static final Map heteroAtomValues = new EnumMap(ChemEl.class); static{ //unknown heteroatoms or carbon are given a value of 0 - heteroAtomValues.put("Hg",2); - heteroAtomValues.put("Tl",3); - heteroAtomValues.put("In",4); - heteroAtomValues.put("Ga",5); - heteroAtomValues.put("Al",6); - heteroAtomValues.put("B",7); - heteroAtomValues.put("Pb",8); - heteroAtomValues.put("Sn",9); - heteroAtomValues.put("Ge",10); - heteroAtomValues.put("Si",11); - heteroAtomValues.put("Bi",12); - heteroAtomValues.put("Sb",13); - heteroAtomValues.put("As",14); - heteroAtomValues.put("P",15); - heteroAtomValues.put("N",16); - heteroAtomValues.put("Te",17); - heteroAtomValues.put("Se",18); - heteroAtomValues.put("S",19); - heteroAtomValues.put("O",20); - heteroAtomValues.put("I",21); - heteroAtomValues.put("Br",22); - heteroAtomValues.put("Cl",23); - heteroAtomValues.put("F",24); + heteroAtomValues.put(ChemEl.Hg, 2); + heteroAtomValues.put(ChemEl.Tl, 3); + heteroAtomValues.put(ChemEl.In, 4); + heteroAtomValues.put(ChemEl.Ga, 5); + heteroAtomValues.put(ChemEl.Al, 6); + heteroAtomValues.put(ChemEl.B, 7); + heteroAtomValues.put(ChemEl.Pb, 8); + heteroAtomValues.put(ChemEl.Sn, 9); + heteroAtomValues.put(ChemEl.Ge, 10); + heteroAtomValues.put(ChemEl.Si, 11); + heteroAtomValues.put(ChemEl.Bi, 12); + heteroAtomValues.put(ChemEl.Sb, 13); + heteroAtomValues.put(ChemEl.As, 14); + heteroAtomValues.put(ChemEl.P, 15); + heteroAtomValues.put(ChemEl.N, 16); + heteroAtomValues.put(ChemEl.Te, 17); + heteroAtomValues.put(ChemEl.Se, 18); + heteroAtomValues.put(ChemEl.S, 19); + heteroAtomValues.put(ChemEl.O, 20); + heteroAtomValues.put(ChemEl.I, 21); + heteroAtomValues.put(ChemEl.Br, 22); + heteroAtomValues.put(ChemEl.Cl, 23); + heteroAtomValues.put(ChemEl.F, 24); } /* * The meaning of the integers used is as follows: @@ -252,18 +250,19 @@ /** * Numbers the fused ring - * Currently only works for a very limited selection of rings + * Works reliably for all common ring systems. + * Some complex fused ring systems involving multiple connections to rings with an odd number of edges may still be wrong * @param fusedRing * @throws StructureBuildingException */ static void numberFusedRing(Fragment fusedRing) throws StructureBuildingException { List rings = SSSRFinder.getSetOfSmallestRings(fusedRing); - if (rings.size() <2){ + if (rings.size() <2) { throw new StructureBuildingException("Ring perception system found less than 2 rings within input fragment!"); } List atomList = fusedRing.getAtomList(); setupAdjacentFusedRingProperties(rings); - if (!checkRingApplicability(rings)){ + if (!checkRingApplicability(rings)) { for (Atom atom : atomList) { atom.clearLocants(); } @@ -286,41 +285,25 @@ } } // find the preferred numbering scheme then relabel with this scheme - Collections.sort( atomSequences, new SortAtomSequences()); - fusedRing.setDefaultInAtom(atomSequences.get(0).get(0)); - FragmentTools.relabelFusedRingSystem(atomSequences.get(0)); + Collections.sort(atomSequences, new SortAtomSequences()); + FragmentTools.relabelLocantsAsFusedRingSystem(atomSequences.get(0)); fusedRing.reorderAtomCollection(atomSequences.get(0)); } /** - * Calculates the number of fused bonds each ring is involved in and - * notes which fused rings are adjacent to each other + * Populates rings with their neighbouring fused rings and the bonds involved * @param rings */ static void setupAdjacentFusedRingProperties(List rings){ - for (Ring curRing : rings) { - for(Bond bond : curRing.getBondList()) { - bond.getFusedRings().clear(); - } - } - for (Ring curRing : rings) { - for(Bond bond : curRing.getBondList()) { // go through all the bonds for the current ring - if (bond.getFusedRings().size()>=2){ // Bond can't be involved in more than 2 rings, hence already analysed so skip it - continue; - } - - for (Ring ring : rings) { // check if this bond belongs to any other ring - if (curRing != ring) { - if (ring.getBondList().contains(bond)) { - bond.addFusedRing(ring); // if so, then add the rings into fusedRing array in the bond - bond.addFusedRing(curRing); - - ring.incrementNumberOfFusedBonds(); // and increment the number of fused bonds the ring is involved in - curRing.incrementNumberOfFusedBonds(); - - ring.addNeighbour(curRing); // and note that the rings are neighbours of each other - curRing.addNeighbour(ring); - } + for (int i = 0, l = rings.size(); i < l; i++) { + Ring curRing = rings.get(i); + bondLoop : for (Bond bond : curRing.getBondList()) { // go through all the bonds for the current ring + for (int j = i + 1; j < l; j++) { + Ring otherRing = rings.get(j); + if (otherRing.getBondList().contains(bond)) { // check if this bond belongs to any other ring + otherRing.addNeighbour(bond, curRing); + curRing.addNeighbour(bond, otherRing); // if so, then associate the bond with the adjacent ring + continue bondLoop; } } } @@ -414,32 +397,31 @@ * @param ct * @param cts * @return - * @throws StructureBuildingException */ - private static List buildRingConnectionTables(Ring currentRing, Ring previousRing, int previousDir, Bond previousBond, Atom atom, RingConnectivityTable ct, List cts) throws StructureBuildingException { + private static List buildRingConnectionTables(Ring currentRing, Ring previousRing, int previousDir, Bond previousBond, Atom atom, RingConnectivityTable ct, List cts) { // order atoms and bonds in the ring currentRing.makeCyclicLists(previousBond, atom); List generatedCts = new ArrayList(); List allowedShapes = getAllowedShapesForRing(currentRing, previousBond); - if (allowedShapes.size()==0){ + if (allowedShapes.size() == 0) { throw new RuntimeException("OPSIN limitation, unsupported ring size in fused ring numbering"); } ct.usedRings.add(currentRing); - for (int i = allowedShapes.size()-1; i >=0; i--) { + for (int i = allowedShapes.size() - 1; i >=0; i--) { FusionRingShape fusionRingShape = allowedShapes.get(i); RingConnectivityTable currentCT; - if (i==0){ + if (i==0) { currentCT = ct; } else{ - currentCT =ct.copy(); + currentCT = ct.copy(); cts.add(currentCT); generatedCts.add(currentCT); } RingShape ringShape = new RingShape(currentRing, fusionRingShape); List ctsToExpand = new ArrayList(); ctsToExpand.add(currentCT);//all the cts to consider, the currentCT and generated clones - for (Ring neighbourRing : currentRing.getNeighbours()){ + for (Ring neighbourRing : currentRing.getNeighbours()) { //find the directions between the current ring and all neighbouring rings including the previous ring // this means that the direction to the previous ring will then be known in both directions @@ -512,7 +494,7 @@ else if (!distances.contains(3)){ allowedRingShapes.add(FusionRingShape.enterFromTopRightHouse); } - allowedRingShapes = removeDegenerateRingShapes(allowedRingShapes, distances); + allowedRingShapes = removeDegenerateRingShapes(allowedRingShapes, distances, 5); } else if (fusedBondCount==5){ allowedRingShapes.add(FusionRingShape.enterFromLeftHouse); @@ -528,8 +510,26 @@ allowedRingShapes.add(FusionRingShape.enterFromLeftSevenMembered); } else{ - allowedRingShapes.add(FusionRingShape.enterFromLeftSevenMembered); - allowedRingShapes.add(FusionRingShape.enterFromRightSevenMembered); + List distances = new ArrayList();//one distance is likely to be 0 + for (Bond fusedBond : fusedBonds) { + distances.add(calculateDistanceBetweenBonds(startingBond, fusedBond, ring)); + } + if (!distances.contains(4) && !distances.contains(6)){ + allowedRingShapes.add(FusionRingShape.enterFromLeftSevenMembered); + } + if (!distances.contains(1) && !distances.contains(6)){ + allowedRingShapes.add(FusionRingShape.enterFromTopSevenMembered); + } + if (!distances.contains(1) && !distances.contains(3)){ + allowedRingShapes.add(FusionRingShape.enterFromRightSevenMembered); + } + if (!distances.contains(2) && !distances.contains(4)){ + allowedRingShapes.add(FusionRingShape.enterFromBottomRightSevenMembered); + } + if (!distances.contains(3) && !distances.contains(5)){ + allowedRingShapes.add(FusionRingShape.enterFromBottomLeftSevenMembered); + } + allowedRingShapes = removeDegenerateRingShapes(allowedRingShapes, distances, 7); } } else{ @@ -542,8 +542,9 @@ * Removes the ring shapes that for given distances have identical properties * @param allowedRingShapes * @param distances + * @param ringSize */ - private static List removeDegenerateRingShapes(List allowedRingShapes, List distances) { + private static List removeDegenerateRingShapes(List allowedRingShapes, List distances, int ringSize) { distances = new ArrayList(distances); distances.remove((Integer)0);//remove distance 0 if present, this invariably comes from the starting bond and is not of interest (and breaks getDirectionFromDist) for (int i = allowedRingShapes.size() - 1; i >=0; i--) { @@ -552,7 +553,7 @@ FusionRingShape shapeToCompareWith = allowedRingShapes.get(j); boolean foundDifference = false; for (Integer distance : distances) { - if (getDirectionFromDist(shapeToConsiderRemoving, 5, distance) != getDirectionFromDist(shapeToCompareWith, 5, distance)){ + if (getDirectionFromDist(shapeToConsiderRemoving, ringSize, distance) != getDirectionFromDist(shapeToCompareWith, ringSize, distance)){ foundDifference = true; break; } @@ -574,9 +575,8 @@ * @param currentBond * @param previousDir * @return - * @throws StructureBuildingException */ - private static int calculateRingDirection(RingShape ringShape, Bond previousBond, Bond currentBond, int previousDir) throws StructureBuildingException{ + private static int calculateRingDirection(RingShape ringShape, Bond previousBond, Bond currentBond, int previousDir) { // take the ring fused to one from the previous loop step Ring ring = ringShape.getRing(); if (ring.getCyclicBondList() == null ) { @@ -613,10 +613,10 @@ } /** - * Uses the ring size, the ring shape and distance between the incoming and outgoing fused bond to determine + * Uses the ring shape, the ring size and distance between the incoming and outgoing fused bond to determine * the relative direction between the entry point on the ring and the exit point * @param fusionRingShape - * @param ringShape + * @param ringSize * @param dist * @return */ @@ -632,19 +632,20 @@ else throw new RuntimeException("Impossible distance between bonds for a 3 membered ring"); } else if (ringSize == 4) { // 4 member ring - if (dist == 2) { - dir = 0; - } - else if (dist ==1) { + if (dist ==1) { dir = -2; } + else if (dist == 2) { + dir = 0; + } else if (dist ==3) { dir = 2; } else throw new RuntimeException("Impossible distance between bonds for a 4 membered ring"); } else if (ringSize == 5) { // 5 member ring - if (fusionRingShape == FusionRingShape.enterFromLeftHouse){ + switch (fusionRingShape) { + case enterFromLeftHouse: if (dist ==1){ dir = -2;//fusion to an elongated bond } @@ -657,9 +658,11 @@ else if (dist ==4){ dir = 3; } - else throw new RuntimeException("Impossible distance between bonds for a 5 membered ring"); - } - else if (fusionRingShape == FusionRingShape.enterFromTopLeftHouse){ + else { + throw new RuntimeException("Impossible distance between bonds for a 5 membered ring"); + } + break; + case enterFromTopLeftHouse: if (dist ==1){ dir = -3; } @@ -672,9 +675,11 @@ else if (dist ==4){ dir = 3; } - else throw new RuntimeException("Impossible distance between bonds for a 5 membered ring"); - } - else if (fusionRingShape == FusionRingShape.enterFromTopRightHouse){ + else { + throw new RuntimeException("Impossible distance between bonds for a 5 membered ring"); + } + break; + case enterFromTopRightHouse: if (dist ==1){ dir = -3; } @@ -687,9 +692,11 @@ else if (dist ==4){ dir = 3; } - else throw new RuntimeException("Impossible distance between bonds for a 5 membered ring"); - } - else if (fusionRingShape == FusionRingShape.enterFromRightHouse){ + else { + throw new RuntimeException("Impossible distance between bonds for a 5 membered ring"); + } + break; + case enterFromRightHouse: if (dist ==1){ dir = -3; } @@ -702,14 +709,63 @@ else if (dist ==4){ dir = 2;//fusion to an elongated bond } - else throw new RuntimeException("Impossible distance between bonds for a 5 membered ring"); - } - else{ + else { + throw new RuntimeException("Impossible distance between bonds for a 5 membered ring"); + } + break; + default : throw new RuntimeException("OPSIN Bug: Unrecognised fusion ring shape for 5 membered ring"); } } else if (ringSize == 7) { // 7 member ring - if (fusionRingShape == FusionRingShape.enterFromLeftSevenMembered){ + switch (fusionRingShape) { + case enterFromLeftSevenMembered: + if (dist ==1){ + dir = -3; + } + else if (dist ==2){ + dir = -1; + } + else if (dist ==3){ + dir = 0; + } + else if (dist ==4){ + dir = 1;//fusion to an abnormally angled bond + } + else if (dist ==5){ + dir = 2; + } + else if (dist ==6){ + dir = 3;//fusion to an abnormally angled bond + } + else { + throw new RuntimeException("Impossible distance between bonds for a 7 membered ring"); + } + break; + case enterFromTopSevenMembered: + if (dist ==1){ + dir = -3;//fusion to an abnormally angled bond + } + else if (dist ==2){ + dir = -2; + } + else if (dist ==3){ + dir = -1; + } + else if (dist ==4){ + dir = 1; + } + else if (dist ==5){ + dir = 2; + } + else if (dist ==6){ + dir = 3;//fusion to an abnormally angled bond + } + else { + throw new RuntimeException("Impossible distance between bonds for a 7 membered ring"); + } + break; + case enterFromRightSevenMembered: if (dist ==1){ dir = -3;//fusion to an abnormally angled bond } @@ -728,9 +784,34 @@ else if (dist ==6){ dir = 3; } - else throw new RuntimeException("Impossible distance between bonds for a 7 membered ring"); - } - else if (fusionRingShape == FusionRingShape.enterFromRightSevenMembered){ + else { + throw new RuntimeException("Impossible distance between bonds for a 7 membered ring"); + } + break; + case enterFromBottomRightSevenMembered: + if (dist ==1){ + dir = -3; + } + else if (dist ==2){ + dir = -2;//fusion to an abnormally angled bond + } + else if (dist ==3){ + dir = -1; + } + else if (dist ==4){ + dir = 0;//fusion to an abnormally angled bond + } + else if (dist ==5){ + dir = 1; + } + else if (dist ==6){ + dir = 3; + } + else { + throw new RuntimeException("Impossible distance between bonds for a 7 membered ring"); + } + break; + case enterFromBottomLeftSevenMembered: if (dist ==1){ dir = -3; } @@ -738,20 +819,22 @@ dir = -1; } else if (dist ==3){ - dir = 0; + dir = 0;//fusion to an abnormally angled bond } else if (dist ==4){ - dir = 1;//fusion to an abnormally angled bond + dir = 1; } else if (dist ==5){ - dir = 2; + dir = 2;//fusion to an abnormally angled bond } else if (dist ==6){ - dir = 3;//fusion to an abnormally angled bond + dir = 3; } - else throw new RuntimeException("Impossible distance between bonds for a 7 membered ring"); - } - else{ + else { + throw new RuntimeException("Impossible distance between bonds for a 7 membered ring"); + } + break; + default: throw new RuntimeException("OPSIN Bug: Unrecognised fusion ring shape for 7 membered ring"); } } @@ -825,7 +908,7 @@ } /** - * Given a list of cts find the longest chain of rings in a line. This can be used a possible horizontal row + * Given a list of cts find the longest chain of rings in a line. This can be used to find a possible horizontal row * The output is a map between the connection tables and the directions which give the longest chains * Some cts may have no directions that give a chain of rings of this length * @@ -839,10 +922,11 @@ if (ct.ringShapes.size() != ct.neighbouringRings.size() || ct.neighbouringRings.size() != ct.directionFromRingToNeighbouringRing.size()) { throw new RuntimeException("OPSIN Bug: Sizes of arrays in fused ring numbering connection table are not equal"); } - int ctEntriesSize =ct.ringShapes.size(); - List directions = new ArrayList(); + int ctEntriesSize = ct.ringShapes.size(); + List directions = new ArrayList(); horizonalRowDirections.put(ct, directions); - for (int i=0; i< ctEntriesSize; i++){ + + for (int i = 0; i < ctEntriesSize; i++) { Ring neighbour = ct.neighbouringRings.get(i); int curChain = 1; int curDir = ct.directionFromRingToNeighbouringRing.get(i); @@ -851,7 +935,7 @@ int indexOfNeighbour = indexOfCorrespondingRingshape(ct.ringShapes, neighbour); if (indexOfNeighbour >= 0) { - for (int j=indexOfNeighbour; j < ctEntriesSize; j++) { + for (int j = indexOfNeighbour; j < ctEntriesSize; j++) { if (ct.ringShapes.get(j).getRing() == neighbour && ct.directionFromRingToNeighbouringRing.get(j) == curDir) { curChain++; neighbour = ct.neighbouringRings.get(j); @@ -987,7 +1071,7 @@ return paths; } - private static Ring[][] generateRingMap(RingConnectivityTable ct, int[] directionFromRingToNeighbouringRing) throws StructureBuildingException { + private static Ring[][] generateRingMap(RingConnectivityTable ct, int[] directionFromRingToNeighbouringRing) { int ctEntriesSize = ct.ringShapes.size(); // Find max and min coordinates for ringMap // we put the first ring into takenRings to start with it in the connection table @@ -1203,18 +1287,19 @@ // Rule B: Maximum number of rings in upper right quadrant. Upper right corner candidates (it is not at this stage known which quadrant is the upper right one) double qmax = 0; - for (int c = 0; c < nChains; c++) { + for (Double[] chainQ : chainQs) { for (int j = 0; j < 4; j++) { - if(chainQs.get(c)[j] > qmax) { - qmax = chainQs.get(c)[j]; + Double q = chainQ[j]; + if(q > qmax) { + qmax = q; } } } - for (int c = 0; c < nChains; c++) { + for (Double[] chainQ : chainQs) { List allowedUpperRightQuadrants = new ArrayList(); for (int j = 0; j < 4; j++){ - if (chainQs.get(c)[j] == qmax) { + if (chainQ[j] == qmax) { allowedUpperRightQuadrants.add(j); } } @@ -1359,12 +1444,7 @@ } // next ring - for (Ring ring : nextBond.getFusedRings()) { - if(ring != currentRing) { - nextRing = ring; - break; - } - } + nextRing = currentRing.getNeighbourOfFusedBond(nextBond); int endNumber = currentRing.getBondIndex(nextBond) ; @@ -1419,7 +1499,7 @@ private static boolean isEntirelyFusionAtoms(Ring upperRightRing) { List atomList = upperRightRing.getAtomList(); for (Atom atom : atomList) { - if (atom.getBonds().size() < 3){ + if (atom.getBondCount() < 3){ return false; } } @@ -1537,12 +1617,12 @@ /** * Checks if array contains an object * @param array - * @param c2 + * @param obj * @return */ - private static boolean arrayContains(Object[] array, Object c2) { - for (int i=0; i0){ + if (allBonds.size() > 0){ return allBonds.get(0); } for (Bond bond : tRing.getBondList()) { - if(bond.getFusedRings().size() < 1){ + if(tRing.getNeighbourOfFusedBond(bond) == null){ + // return a non-fused bond return bond; } } @@ -1696,15 +1777,14 @@ else { interimDirection = relativeDirection + previousDir; } - - if (Math.abs(interimDirection)>4) {// Added + if (Math.abs(interimDirection) > 4) {// Added interimDirection = (8 - Math.abs(interimDirection)) * Integer.signum(interimDirection) * -1; } //TODO investigate this function and unit test /* Even numbered rings when angled do not have direction 2. * Almost true for 5 member except for corner case where fusion to elongated bond occurs */ - if (Math.abs(interimDirection) == 2 && ((ringSize % 2 ==0) || ringSize==5)) { + if (Math.abs(interimDirection) == 2 && ((ringSize % 2 ==0) || ringSize==5 || ringSize==7)) { // if (one of them equal to 1 and another is equal to 3, we decrease absolute value and conserve the sign) if (Math.abs(relativeDirection)==1 && Math.abs(previousDir)==3 || Math.abs(relativeDirection)==3 && Math.abs(previousDir)==1) { interimDirection = 1 * Integer.signum(interimDirection); diff -Nru opsin-1.5.0/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/GroupingEl.java opsin-2.3.1/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/GroupingEl.java --- opsin-1.5.0/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/GroupingEl.java 1970-01-01 00:00:00.000000000 +0000 +++ opsin-2.3.1/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/GroupingEl.java 2017-07-23 20:55:18.000000000 +0000 @@ -0,0 +1,121 @@ +package uk.ac.cam.ch.wwmm.opsin; + +import java.util.ArrayList; +import java.util.List; + +class GroupingEl extends Element{ + + private final List children = new ArrayList(); + + GroupingEl(String name) { + super(name); + } + + @Override + void addChild(Element child) { + child.setParent(this); + children.add(child); + } + + @Override + Element copy() { + GroupingEl copy = new GroupingEl(this.name); + for (Element childEl : this.children) { + Element newChild = childEl.copy(); + newChild.setParent(copy); + copy.addChild(newChild); + } + for (int i = 0, len = this.attributes.size(); i < len; i++) { + Attribute atr = this.attributes.get(i); + copy.addAttribute(new Attribute(atr)); + } + return copy; + } + + @Override + Element getChild(int index) { + return children.get(index); + } + + @Override + int getChildCount() { + return children.size(); + } + + @Override + List getChildElements() { + return new ArrayList(children); + } + + @Override + List getChildElements(String name) { + List elements = new ArrayList(1); + for (Element element : children) { + if (element.name.equals(name)) { + elements.add(element); + } + } + return elements; + } + + @Override + Element getFirstChildElement(String name) { + for (Element child : children) { + if (child.getName().equals(name)) { + return child; + } + } + return null; + } + + String getValue() { + int childCount = getChildCount(); + if (childCount == 0) { + return ""; + } + StringBuilder result = new StringBuilder(); + for (int i = 0; i < childCount; i++) { + result.append(children.get(i).getValue()); + } + return result.toString(); + } + + @Override + int indexOf(Element child) { + return children.indexOf(child); + } + + @Override + void insertChild(Element child, int index) { + child.setParent(this); + children.add(index, child); + } + + @Override + boolean removeChild(Element child) { + child.setParent(null); + return children.remove(child); + } + + @Override + Element removeChild(int index) { + Element removed = children.remove(index); + removed.setParent(null); + return removed; + } + + @Override + void replaceChild(Element oldChild, Element newChild) { + int index = indexOf(oldChild); + if (index == -1) { + throw new RuntimeException("oldChild is not a child of this element."); + } + removeChild(index); + insertChild(newChild, index); + } + + void setValue(String text) { + throw new UnsupportedOperationException("Token groups do not have a value"); + } + +} diff -Nru opsin-1.5.0/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/IndentingXMLStreamWriter.java opsin-2.3.1/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/IndentingXMLStreamWriter.java --- opsin-1.5.0/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/IndentingXMLStreamWriter.java 1970-01-01 00:00:00.000000000 +0000 +++ opsin-2.3.1/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/IndentingXMLStreamWriter.java 2017-07-23 20:55:18.000000000 +0000 @@ -0,0 +1,50 @@ +package uk.ac.cam.ch.wwmm.opsin; + +import javax.xml.stream.XMLStreamException; +import javax.xml.stream.XMLStreamWriter; + +import org.codehaus.stax2.util.StreamWriterDelegate; + +/** + * This only overrides the commands actually used by the CmlWriter i.e. it isn't general + */ +class IndentingXMLStreamWriter extends StreamWriterDelegate { + + private final int indentSize; + private int depth = 0; + private boolean atStartOfNewline = false; + + IndentingXMLStreamWriter(XMLStreamWriter writer, int indentSize) { + super(writer); + this.indentSize = indentSize; + } + + @Override + public void writeStartElement(String arg0) throws XMLStreamException { + if (!atStartOfNewline){ + super.writeCharacters(OpsinTools.NEWLINE); + } + super.writeCharacters(StringTools.multiplyString(" ", depth * indentSize)); + super.writeStartElement(arg0); + atStartOfNewline = false; + depth++; + } + + @Override + public void writeEndElement() throws XMLStreamException { + depth--; + if (atStartOfNewline) { + super.writeCharacters(StringTools.multiplyString(" ", depth * indentSize)); + } + super.writeEndElement(); + super.writeCharacters(OpsinTools.NEWLINE); + atStartOfNewline = true; + } + + @Override + public void writeCharacters(String arg0) throws XMLStreamException { + super.writeCharacters(arg0); + atStartOfNewline = false; + } + +} diff -Nru opsin-1.5.0/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/IsotopeSpecificationParser.java opsin-2.3.1/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/IsotopeSpecificationParser.java --- opsin-1.5.0/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/IsotopeSpecificationParser.java 1970-01-01 00:00:00.000000000 +0000 +++ opsin-2.3.1/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/IsotopeSpecificationParser.java 2017-07-23 20:55:18.000000000 +0000 @@ -0,0 +1,109 @@ +package uk.ac.cam.ch.wwmm.opsin; + +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +class IsotopeSpecificationParser { + + private static final Pattern matchBoughtonIsotope =Pattern.compile("(?:-([^,]+(?:,[^,]+)*))?-d(\\d+)?"); + private static final Pattern matchIupacIsotope =Pattern.compile("(?:([^,]+(?:,[^,]+)*)-)?(\\d+)([A-Z][a-z]?)(\\d+)?"); + + static class IsotopeSpecification { + private final ChemEl chemEl; + private final int isotope; + private final int multiplier; + private final String[] locants; + + IsotopeSpecification(ChemEl chemEl, int isotope, int multiplier, String[] locants) { + this.chemEl = chemEl; + this.isotope = isotope; + this.multiplier = multiplier; + this.locants = locants; + } + + ChemEl getChemEl() { + return chemEl; + } + + int getIsotope() { + return isotope; + } + + int getMultiplier() { + return multiplier; + } + + String[] getLocants() { + return locants; + } + } + + static IsotopeSpecification parseIsotopeSpecification(Element isotopeSpecification) throws StructureBuildingException { + String type = isotopeSpecification.getAttributeValue(XmlDeclarations.TYPE_ATR); + if (XmlDeclarations.BOUGHTONSYSTEM_TYPE_VAL.equals(type)) { + return processBoughtonIsotope(isotopeSpecification); + } + else if (XmlDeclarations.IUPACSYSTEM_TYPE_VAL.equals(type)) { + return processIupacIsotope(isotopeSpecification); + } + else { + throw new RuntimeException("Unsupported isotope specification syntax"); + } + } + + private static IsotopeSpecification processBoughtonIsotope(Element isotopeSpecification) throws StructureBuildingException { + String val = isotopeSpecification.getValue(); + Matcher m = matchBoughtonIsotope.matcher(val); + if (!m.matches()) { + throw new RuntimeException("Malformed isotope specification: " + val); + } + ChemEl chemEl = ChemEl.H; + int isotope = 2; + + int multiplier = 1; + String multiplierStr = m.group(2); + if (multiplierStr != null) { + multiplier = Integer.parseInt(multiplierStr); + } + + String locantsStr = m.group(1); + String[] locants = null; + if(locantsStr != null) { + locants = locantsStr.split(","); + if (locants.length != multiplier) { + throw new StructureBuildingException("Mismatch between number of locants: " + locants.length + " and number of hydrogen isotopes requested: " + multiplier); + } + } + return new IsotopeSpecification(chemEl, isotope, multiplier, locants); + } + + private static IsotopeSpecification processIupacIsotope(Element isotopeSpecification) throws StructureBuildingException { + String val = isotopeSpecification.getValue(); + Matcher m = matchIupacIsotope.matcher(val); + if (!m.matches()) { + throw new RuntimeException("Malformed isotope specification: " + val); + } + + int isotope = Integer.parseInt(m.group(2)); + ChemEl chemEl = ChemEl.valueOf(m.group(3)); + + int multiplier = 1; + String multiplierStr = m.group(4); + if (multiplierStr != null) { + multiplier = Integer.parseInt(multiplierStr); + } + + String locantsStr = m.group(1); + String[] locants = null; + if(locantsStr != null) { + locants = locantsStr.split(","); + if (multiplierStr == null) { + multiplier = locants.length; + } + else if (locants.length != multiplier) { + throw new StructureBuildingException("Mismatch between number of locants: " + locants.length + " and number of " + chemEl.toString() +" isotopes requested: " + multiplier); + } + } + return new IsotopeSpecification(chemEl, isotope, multiplier, locants); + } +} diff -Nru opsin-1.5.0/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/NameToStructureConfig.java opsin-2.3.1/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/NameToStructureConfig.java --- opsin-1.5.0/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/NameToStructureConfig.java 2013-07-21 14:02:29.000000000 +0000 +++ opsin-2.3.1/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/NameToStructureConfig.java 2017-07-23 20:55:18.000000000 +0000 @@ -33,7 +33,7 @@ /** * Are radicals allowed? e.g. should fragments such as phenyl be interpretable - * @return + * @return whether radicals are allowed */ public boolean isAllowRadicals() { return allowRadicals; @@ -41,7 +41,6 @@ /** * Sets whether radicals allowed? e.g. should fragments such as phenyl be interpretable - * @return */ public void setAllowRadicals(boolean allowRadicals) { this.allowRadicals = allowRadicals; @@ -49,14 +48,14 @@ /** * Are radicals output as wildcard atoms e.g. [*]CC for ethyl - * @return + * @return whether radicals are output using explicit wildcard atoms */ public boolean isOutputRadicalsAsWildCardAtoms() { return outputRadicalsAsWildCardAtoms; } /** - * Should radicals be output as wildcard atoms e.g. [*]CC for ethyl + * Should radicals be output as wildcard atoms e.g. [*]CC for ethyl (as opposed to [CH2]C)
* Note that if this is set to true InChIs cannot be generated * @param outputRadicalsAsWildCardAtoms */ @@ -66,7 +65,7 @@ /** * Should OPSIN attempt reverse parsing to more accurately determine why parsing failed - * @return + * @return whether a more precise cause of failure should be determined if parsing fails */ public boolean isDetailedFailureAnalysis() { return detailedFailureAnalysis; @@ -74,7 +73,6 @@ /** * Sets whether OPSIN should attempt reverse parsing to more accurately determine why parsing failed - * @return */ public void setDetailedFailureAnalysis(boolean detailedFailureAnalysis) { this.detailedFailureAnalysis = detailedFailureAnalysis; @@ -82,7 +80,7 @@ /** * Are acids without the word "acid" interpretable e.g. should "acetic" be interpretable - * @return + * @return whether acids without the word "acid" should be interpretable */ public boolean allowInterpretationOfAcidsWithoutTheWordAcid() { return interpretAcidsWithoutTheWordAcid; @@ -100,7 +98,7 @@ /** * If OPSIN cannot understand the stereochemistry in a name should OPSIN's result be a warning * and structure with incomplete stereochemistry, or should failure be returned (Default) - * @return + * @return whether ignored stereochemistry is a warning (rather than a failure) */ public boolean warnRatherThanFailOnUninterpretableStereochemistry() { return warnRatherThanFailOnUninterpretableStereochemistry; @@ -112,8 +110,7 @@ * and structure with incomplete stereochemistry, or should failure be returned (Default) * @param warnRatherThanFailOnUninterpretableStereochemistry */ - public void setWarnRatherThanFailOnUninterpretableStereochemistry( - boolean warnRatherThanFailOnUninterpretableStereochemistry) { + public void setWarnRatherThanFailOnUninterpretableStereochemistry(boolean warnRatherThanFailOnUninterpretableStereochemistry) { this.warnRatherThanFailOnUninterpretableStereochemistry = warnRatherThanFailOnUninterpretableStereochemistry; } @@ -133,8 +130,7 @@ @Override public NameToStructureConfig clone() { try { - NameToStructureConfig copy = (NameToStructureConfig) super.clone(); - return copy; + return (NameToStructureConfig) super.clone(); } catch (CloneNotSupportedException e) { // Can only be thrown if we *don't* implement Cloneable, which we do... throw new Error("Impossible!", e); diff -Nru opsin-1.5.0/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/NameToStructureException.java opsin-2.3.1/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/NameToStructureException.java --- opsin-1.5.0/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/NameToStructureException.java 2013-07-21 14:02:29.000000000 +0000 +++ opsin-2.3.1/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/NameToStructureException.java 2017-07-23 20:55:18.000000000 +0000 @@ -11,22 +11,18 @@ NameToStructureException() { super(); - // TODO Auto-generated constructor stub } NameToStructureException(String message) { super(message); - // TODO Auto-generated constructor stub } NameToStructureException(String message, Throwable cause) { super(message, cause); - // TODO Auto-generated constructor stub } NameToStructureException(Throwable cause) { super(cause); - // TODO Auto-generated constructor stub } } diff -Nru opsin-1.5.0/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/NameToStructure.java opsin-2.3.1/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/NameToStructure.java --- opsin-1.5.0/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/NameToStructure.java 2013-07-21 14:02:29.000000000 +0000 +++ opsin-2.3.1/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/NameToStructure.java 2017-07-23 20:55:18.000000000 +0000 @@ -13,22 +13,28 @@ import java.lang.reflect.Method; import java.util.Collections; import java.util.List; +import java.util.Properties; + +import javax.xml.stream.XMLOutputFactory; +import javax.xml.stream.XMLStreamException; +import javax.xml.stream.XMLStreamWriter; import org.apache.commons.cli.CommandLine; import org.apache.commons.cli.CommandLineParser; +import org.apache.commons.cli.DefaultParser; import org.apache.commons.cli.HelpFormatter; -import org.apache.commons.cli.OptionBuilder; +import org.apache.commons.cli.Option; +import org.apache.commons.cli.Option.Builder; +import org.apache.commons.io.IOUtils; import org.apache.commons.cli.Options; -import org.apache.commons.cli.ParseException; -import org.apache.commons.cli.PosixParser; import org.apache.commons.cli.UnrecognizedOptionException; import org.apache.log4j.Level; import org.apache.log4j.Logger; -import uk.ac.cam.ch.wwmm.opsin.OpsinResult.OPSIN_RESULT_STATUS; +import com.ctc.wstx.api.WstxOutputProperties; +import com.ctc.wstx.stax.WstxOutputFactory; -import nu.xom.Attribute; -import nu.xom.Element; +import uk.ac.cam.ch.wwmm.opsin.OpsinResult.OPSIN_RESULT_STATUS; /** The "master" class, to turn a name into a structure. * @@ -38,37 +44,52 @@ public class NameToStructure { private static final Logger LOG = Logger.getLogger(NameToStructure.class); - - /**Does finite-state non-destructive parsing on chemical names.*/ - private Parser parser; - /**Applies OPSIN's grammar to tokenise and assign meanings tokens.*/ + /**Applies OPSIN's grammar to tokenise and assign meaning to tokens*/ private ParseRules parseRules; - /** A builder for fragments specified as SMILES */ - private SMILESFragmentBuilder sBuilder; - - /**Constructs a single fragment from the result of the component generation and processing stages.*/ - private StructureBuilder structureBuilder; + /**Parses a chemical name into one (or more in the case of ambiguity) parse trees*/ + private Parser parser; - /**Contains rules on how to interpret suffixes*/ + /**Which suffixes apply to what and what their effects are*/ private SuffixRules suffixRules; private static NameToStructure NTS_INSTANCE; public static synchronized NameToStructure getInstance() { - if (NTS_INSTANCE ==null){ + if (NTS_INSTANCE == null) { NTS_INSTANCE = new NameToStructure(); } return NTS_INSTANCE; } + + /** + * Returns the version of the OPSIN library + * @return Version number String + */ + public static String getVersion() { + try { + InputStream is = NameToStructure.class.getResourceAsStream("opsinbuild.props"); + try { + Properties props = new Properties(); + props.load(is); + return props.getProperty("version"); + } + finally { + IOUtils.closeQuietly(is); + } + } + catch (Exception e) { + return null; + } + } /**Initialises the name-to-structure converter. * * @throws NameToStructureException If the converter cannot be initialised, most likely due to bad or missing data files. */ private NameToStructure() { - LOG.info("Initialising OPSIN... "); + LOG.debug("Initialising OPSIN... "); try { /*Initialise all of OPSIN's classes. Some classes are injected as dependencies into subsequent classes*/ @@ -79,15 +100,11 @@ parseRules = new ParseRules(resourceManager); Tokeniser tokeniser = new Tokeniser(parseRules); parser = new Parser(wordRules, tokeniser, resourceManager); - - sBuilder = new SMILESFragmentBuilder(); - structureBuilder = new StructureBuilder(); suffixRules = new SuffixRules(resourceGetter); - } catch (Exception e) { throw new NameToStructureException(e.getMessage(), e); } - LOG.info("OPSIN initialised"); + LOG.debug("OPSIN initialised"); } /** @@ -95,11 +112,11 @@ * @param name The chemical name to parse. * @return A CML element, containing the parsed molecule, or null if the name was uninterpretable. */ - public Element parseToCML(String name) { + public String parseToCML(String name) { OpsinResult result = parseChemicalName(name); - Element cml = result.getCml(); + String cml = result.getCml(); if(cml != null && LOG.isDebugEnabled()){ - LOG.debug(new XOMFormatter().elemToString(result.getCml())); + LOG.debug(cml); } return cml; } @@ -141,64 +158,69 @@ * @return OpsinResult */ public OpsinResult parseChemicalName(String name, NameToStructureConfig n2sConfig) { - n2sConfig = n2sConfig.clone();//avoid n2sconfig being modified mid name processing - if (name==null){ + if (name == null){ throw new IllegalArgumentException("String given for name was null"); } - String message = ""; + n2sConfig = n2sConfig.clone();//avoid n2sconfig being modified mid name processing + + List parses; try { LOG.debug(name); String modifiedName = PreProcessor.preProcess(name); - List parses = parser.parse(n2sConfig, modifiedName); - //if(LOG.isDebugEnabled()) for(Element parse : parses) LOG.debug(new XOMFormatter().elemToString(parse)); + parses = parser.parse(n2sConfig, modifiedName); Collections.sort(parses, new SortParses());//fewer tokens preferred - Fragment fragGeneratedWithWarning = null; - String warningMessage = null; - for(Element parse : parses) { - try { - if (LOG.isDebugEnabled()){ - LOG.debug(new XOMFormatter().elemToString(parse)); - } - new ComponentGenerator(n2sConfig, parse).processParse(); - if (LOG.isDebugEnabled()){ - LOG.debug(new XOMFormatter().elemToString(parse)); - } - BuildState state = new BuildState(n2sConfig, sBuilder); - new ComponentProcessor(suffixRules, state, parse).processParse(); - if (LOG.isDebugEnabled()){ - LOG.debug(new XOMFormatter().elemToString(parse)); - } - Fragment frag = structureBuilder.buildFragment(state, parse); - if (LOG.isDebugEnabled()){ - LOG.debug(new XOMFormatter().elemToString(parse)); - } - if (state.getWarningMessage() == null){ - return new OpsinResult(frag, OPSIN_RESULT_STATUS.SUCCESS, "", name); - } - if (fragGeneratedWithWarning == null){ - //record first frag that had a warning but try other parses as they may work without a warning - fragGeneratedWithWarning = frag; - warningMessage = state.getWarningMessage(); - } - } catch (Exception e) { - if (message.length() ==0){ - message = e.getMessage(); - } - if (LOG.isDebugEnabled()){ - LOG.debug(e.getMessage(),e); - } - } - } - if (fragGeneratedWithWarning != null){ - return new OpsinResult(fragGeneratedWithWarning, OPSIN_RESULT_STATUS.WARNING, warningMessage, name); - } } catch (Exception e) { - message += e.getMessage(); if(LOG.isDebugEnabled()) { - LOG.debug(e.getMessage(),e); + LOG.debug(e.getMessage(), e); } + String message = e.getMessage() != null ? e.getMessage() : "exception with null message"; + return new OpsinResult(null, OPSIN_RESULT_STATUS.FAILURE, message, name); } - return new OpsinResult(null, OPSIN_RESULT_STATUS.FAILURE, message, name); + String reasonForFailure = ""; + Fragment fragGeneratedWithWarning = null; + List warnings = Collections.emptyList(); + for(Element parse : parses) { + try { + if (LOG.isDebugEnabled()) { + LOG.debug(parse.toXML()); + } + //Performs XML manipulation e.g. nesting bracketing, processing some nomenclatures + new ComponentGenerator(n2sConfig).processParse(parse); + if (LOG.isDebugEnabled()) { + LOG.debug(parse.toXML()); + } + BuildState state = new BuildState(n2sConfig); + //Converts the XML to fragments (handles many different nomenclatueres for describing structure). Assigns locants + new ComponentProcessor(state, new SuffixApplier(state, suffixRules)).processParse(parse); + if (LOG.isDebugEnabled()) { + LOG.debug(parse.toXML()); + } + //Constructs a single fragment from the fragments generated by the ComponentProcessor. Applies stereochemistry + Fragment frag = new StructureBuilder(state).buildFragment(parse); + if (LOG.isDebugEnabled()) { + LOG.debug(parse.toXML()); + } + if (state.getWarnings().size() == 0) { + return new OpsinResult(frag, OPSIN_RESULT_STATUS.SUCCESS, "", name); + } + if (fragGeneratedWithWarning == null) { + //record first frag that had a warning but try other parses as they may work without a warning + fragGeneratedWithWarning = frag; + warnings = state.getWarnings(); + } + } catch (Exception e) { + if (reasonForFailure.length() == 0) { + reasonForFailure = e.getMessage() != null ? e.getMessage() : "exception with null message"; + } + if (LOG.isDebugEnabled()) { + LOG.debug(e.getMessage(), e); + } + } + } + if (fragGeneratedWithWarning != null) { + return new OpsinResult(fragGeneratedWithWarning, OPSIN_RESULT_STATUS.WARNING, warnings, name); + } + return new OpsinResult(null, OPSIN_RESULT_STATUS.FAILURE, reasonForFailure, name); } /** @@ -206,13 +228,18 @@ * This can be used to determine whether a word can be interpreted as being part of a chemical name. * Just because a word can be split into tokens does not mean the word constitutes a valid chemical name * e.g. ester is interpretable but is not in itself a chemical name - * @return - + * @return Opsin parser for recognition/parsing of a chemical word */ public static ParseRules getOpsinParser() { NameToStructure n2s = NameToStructure.getInstance(); return n2s.parseRules; } + + private enum InchiType{ + inchiWithFixedH, + stdInchi, + stdInchiKey + } /**Run OPSIN as a command-line application. * @@ -221,7 +248,7 @@ */ public static void main(String [] args) throws Exception { Options options = buildCommandLineOptions(); - CommandLineParser parser = new PosixParser(); + CommandLineParser parser = new DefaultParser(); CommandLine cmd = null; try{ cmd = parser.parse(options, args); @@ -261,28 +288,35 @@ } System.err.println("Run the jar using the -h flag for help. Enter a chemical name to begin:"); - String outputType = cmd.getOptionValue("o", "cml"); - if (outputType.equalsIgnoreCase("cml")){ + String outputType = cmd.getOptionValue("o", "smi"); + if (outputType.equalsIgnoreCase("cml")) { interactiveCmlOutput(input, output, n2sconfig); } - else if (outputType.equalsIgnoreCase("smi") || outputType.equalsIgnoreCase("smiles")){ - interactiveSmilesOutput(input, output, n2sconfig); + else if (outputType.equalsIgnoreCase("smi") || outputType.equalsIgnoreCase("smiles")) { + interactiveSmilesOutput(input, output, n2sconfig, false); + } + else if (outputType.equalsIgnoreCase("inchi")) { + interactiveInchiOutput(input, output, n2sconfig, InchiType.inchiWithFixedH); } - else if (outputType.equalsIgnoreCase("inchi")){ - interactiveInchiOutput(input, output, n2sconfig, false); + else if (outputType.equalsIgnoreCase("stdinchi")) { + interactiveInchiOutput(input, output, n2sconfig, InchiType.stdInchi); } - else if (outputType.equalsIgnoreCase("stdinchi")){ - interactiveInchiOutput(input, output, n2sconfig, true); + else if (outputType.equalsIgnoreCase("stdinchikey")) { + interactiveInchiOutput(input, output, n2sconfig, InchiType.stdInchiKey); + } + else if (outputType.equalsIgnoreCase("extendedsmi") || outputType.equalsIgnoreCase("extendedsmiles") || + outputType.equalsIgnoreCase("cxsmi") || outputType.equalsIgnoreCase("cxsmiles")) { + interactiveSmilesOutput(input, output, n2sconfig, true); } else{ System.err.println("Unrecognised output format: " + outputType); - System.err.println("Expected output types are \"cml\", \"smi\", \"inchi\" and \"stdinchi\""); + System.err.println("Expected output types are \"cml\", \"smi\", \"inchi\", \"stdinchi\" and \"stdinchikey\""); System.exit(1); } - if (unparsedArgs.length == 1){ + if (unparsedArgs.length == 1) { input.close(); } - else if (unparsedArgs.length == 2){ + else if (unparsedArgs.length == 2) { input.close(); output.close(); } @@ -290,24 +324,30 @@ private static void displayUsage(Options options) { HelpFormatter formatter = new HelpFormatter(); - formatter.printHelp("java -jar opsin-[version]-jar-with-dependencies.jar [options] [inputfile] [outputfile]\n" + - "OPSIN converts systematic chemical names to CML, SMILES or InChI/StdInChI\n" + + String version = getVersion(); + formatter.printHelp("java -jar opsin-" + (version != null ? version : "[version]") + "-jar-with-dependencies.jar [options] [inputfile] [outputfile]" + OpsinTools.NEWLINE + + "OPSIN converts systematic chemical names to CML, SMILES or InChI/StdInChI/StdInChIKey" + OpsinTools.NEWLINE + "Names should be new line delimited and may be read from stdin (default) or a file and output to stdout (default) or a file", options); System.exit(0); } - private static Options buildCommandLineOptions() throws ParseException { + private static Options buildCommandLineOptions() { Options options = new Options(); - OptionBuilder.withArgName("o"); - OptionBuilder.withLongOpt("output"); - OptionBuilder.hasArg(); - OptionBuilder.withDescription("Sets OPSIN's output format (default cml)\n" + - "Allowed values are:\n" + - "cml for Chemical Markup Language\n" + - "smi for SMILES\n" + - "inchi for InChI\n" + - "stdinchi for StdInChI"); - options.addOption(OptionBuilder.create("o")); + Builder outputBuilder = Option.builder("o"); + outputBuilder.longOpt("output"); + outputBuilder.hasArg(); + outputBuilder.argName("format"); + StringBuilder outputOptionsDesc = new StringBuilder(); + outputOptionsDesc.append("Sets OPSIN's output format (default smi)").append(OpsinTools.NEWLINE); + outputOptionsDesc.append("Allowed values are:").append(OpsinTools.NEWLINE); + outputOptionsDesc.append("cml for Chemical Markup Language").append(OpsinTools.NEWLINE); + outputOptionsDesc.append("smi for SMILES").append(OpsinTools.NEWLINE); + outputOptionsDesc.append("extendedsmi for Extended SMILES").append(OpsinTools.NEWLINE); + outputOptionsDesc.append("inchi for InChI (with FixedH)").append(OpsinTools.NEWLINE); + outputOptionsDesc.append("stdinchi for StdInChI").append(OpsinTools.NEWLINE); + outputOptionsDesc.append("stdinchikey for StdInChIKey"); + outputBuilder.desc(outputOptionsDesc.toString()); + options.addOption(outputBuilder.build()); options.addOption("h", "help", false, "Displays the allowed command line flags"); options.addOption("v", "verbose", false, "Enables debugging"); @@ -322,7 +362,7 @@ /** * Uses the command line parameters to configure a new NameToStructureConfig * @param cmd - * @return + * @return The configured NameToStructureConfig */ private static NameToStructureConfig generateOpsinConfigObjectFromCmd(CommandLine cmd) { NameToStructureConfig n2sconfig = new NameToStructureConfig(); @@ -334,52 +374,41 @@ return n2sconfig; } - private static void interactiveCmlOutput(InputStream input, OutputStream out, NameToStructureConfig n2sconfig) throws IOException { + private static void interactiveCmlOutput(InputStream input, OutputStream out, NameToStructureConfig n2sconfig) throws IOException, XMLStreamException { NameToStructure nts = NameToStructure.getInstance(); BufferedReader inputReader = new BufferedReader(new InputStreamReader(input, "UTF-8")); - StreamSerializer serializer = new StreamSerializer(out); - serializer.setIndent(2); - serializer.writeXMLDeclaration(); - Element cml = new Element("cml", XmlDeclarations.CML_NAMESPACE); - cml.addAttribute(new Attribute("convention","conventions:molecular")); - cml.addNamespaceDeclaration("conventions", "http://www.xml-cml.org/convention/"); - cml.addNamespaceDeclaration("cmlDict", "http://www.xml-cml.org/dictionary/cml/"); - cml.addNamespaceDeclaration("nameDict", "http://www.xml-cml.org/dictionary/cml/name/"); - serializer.writeStartTag(cml); - int id =1; + XMLOutputFactory factory = new WstxOutputFactory(); + factory.setProperty(WstxOutputProperties.P_OUTPUT_ESCAPE_CR, false); + XMLStreamWriter writer = factory.createXMLStreamWriter(out, "UTF-8"); + writer = new IndentingXMLStreamWriter(writer, 2); + writer.writeStartDocument(); + CMLWriter cmlWriter = new CMLWriter(writer); + cmlWriter.writeCmlStart(); + int id = 1; String name; while((name =inputReader.readLine()) != null) { OpsinResult result = nts.parseChemicalName(name, n2sconfig); - Element output = result.getCml(); - if(output == null) { + Fragment structure = result.getStructure(); + cmlWriter.writeMolecule(structure, name, id++); + writer.flush(); + if(structure == null) { System.err.println(result.getMessage()); - Element uninterpretableMolecule = new Element("molecule", XmlDeclarations.CML_NAMESPACE); - uninterpretableMolecule.addAttribute(new Attribute("id", "m" + id++)); - Element nameEl = new Element("name", XmlDeclarations.CML_NAMESPACE); - nameEl.appendChild(name); - nameEl.addAttribute(new Attribute("dictRef", "nameDict:unknown")); - uninterpretableMolecule.appendChild(nameEl); - serializer.write(uninterpretableMolecule); - serializer.flush(); - } else { - Element molecule = XOMTools.getChildElementsWithTagName(output, "molecule").get(0); - molecule.getAttribute("id").setValue("m" + id++); - serializer.write(molecule); - serializer.flush(); } } - serializer.writeEndTag(cml); - serializer.flush(); + cmlWriter.writeCmlEnd(); + writer.writeEndDocument(); + writer.flush(); + writer.close(); } - private static void interactiveSmilesOutput(InputStream input, OutputStream out, NameToStructureConfig n2sconfig) throws IOException { + private static void interactiveSmilesOutput(InputStream input, OutputStream out, NameToStructureConfig n2sconfig, boolean extendedSmiles) throws IOException { NameToStructure nts = NameToStructure.getInstance(); BufferedReader inputReader = new BufferedReader(new InputStreamReader(input, "UTF-8")); BufferedWriter outputWriter = new BufferedWriter(new OutputStreamWriter(out, "UTF-8")); String name; while((name =inputReader.readLine()) != null) { OpsinResult result = nts.parseChemicalName(name, n2sconfig); - String output = result.getSmiles(); + String output = extendedSmiles ? result.getExtendedSmiles() : result.getSmiles(); if(output == null) { System.err.println(result.getMessage()); } else { @@ -390,7 +419,7 @@ } } - private static void interactiveInchiOutput(InputStream input, OutputStream out, NameToStructureConfig n2sconfig, boolean produceStdInChI) throws Exception { + private static void interactiveInchiOutput(InputStream input, OutputStream out, NameToStructureConfig n2sconfig, InchiType inchiType) throws Exception { NameToStructure nts = NameToStructure.getInstance(); BufferedReader inputReader = new BufferedReader(new InputStreamReader(input, "UTF-8")); BufferedWriter outputWriter = new BufferedWriter(new OutputStreamWriter(out, "UTF-8")); @@ -402,17 +431,24 @@ throw new RuntimeException(e); } Method m; - if (produceStdInChI){ - m = c.getMethod("convertResultToStdInChI", new Class[]{OpsinResult.class}); - } - else{ + switch (inchiType) { + case inchiWithFixedH: m = c.getMethod("convertResultToInChI", new Class[]{OpsinResult.class}); + break; + case stdInchi: + m = c.getMethod("convertResultToStdInChI", new Class[]{OpsinResult.class}); + break; + case stdInchiKey: + m = c.getMethod("convertResultToStdInChIKey", new Class[]{OpsinResult.class}); + break; + default : + throw new IllegalArgumentException("Unexepected enum value: " + inchiType); } String name; while((name =inputReader.readLine()) != null) { OpsinResult result = nts.parseChemicalName(name, n2sconfig); - String output = (String) m.invoke(null, new Object[]{result}); + String output = (String) m.invoke(null, result); if(output == null) { System.err.println(result.getMessage()); } else { diff -Nru opsin-1.5.0/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/OpsinRadixTrie.java opsin-2.3.1/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/OpsinRadixTrie.java --- opsin-1.5.0/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/OpsinRadixTrie.java 2013-07-21 14:02:29.000000000 +0000 +++ opsin-2.3.1/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/OpsinRadixTrie.java 2017-07-23 20:55:18.000000000 +0000 @@ -47,30 +47,29 @@ List findMatches(String chemicalName, int posInName) { int untokenisedChemicalNameLength = chemicalName.length(); List indexes = null; - if (rootNode.isEndPoint()){ + if (rootNode.isEndPoint()) { indexes = new ArrayList(); indexes.add(posInName); } - OpsinTrieNode currentNode = rootNode; + OpsinTrieNode node = rootNode; for (int i = posInName; i < untokenisedChemicalNameLength; i++) { - OpsinTrieNode node = currentNode.getChild(chemicalName.charAt(i)); - if (node != null) { - currentNode = node; - int charsMatched = currentNode.getNumberOfMatchingCharacters(chemicalName, i); - i += (charsMatched - 1); - if (charsMatched == currentNode.getValue().length()){ - if (currentNode.isEndPoint()) { - if (indexes == null) { - indexes = new ArrayList(); - } - indexes.add(i + 1); - } - } - else{ + node = node.getChild(chemicalName.charAt(i)); + if (node == null) { + break; + } + int nodeLength = node.getValue().length(); + if (nodeLength > 1) { + int charsMatched = node.getNumberOfMatchingCharacters(chemicalName, i); + if (charsMatched != nodeLength) { break; } - } else { - break; + i += (charsMatched - 1); + } + if (node.isEndPoint()) { + if (indexes == null) { + indexes = new ArrayList(); + } + indexes.add(i + 1); } } return indexes; @@ -84,30 +83,29 @@ */ List findMatchesReadingStringRightToLeft(String chemicalName, int posInName ) { List indexes = null; - if (rootNode.isEndPoint()){ + if (rootNode.isEndPoint()) { indexes = new ArrayList(); indexes.add(posInName); } - OpsinTrieNode currentNode = rootNode; + OpsinTrieNode node = rootNode; for (int i = posInName - 1; i >=0; i--) { - OpsinTrieNode node = currentNode.getChild(chemicalName.charAt(i)); - if (node != null) { - currentNode = node; - int charsMatched = currentNode.getNumberOfMatchingCharactersInReverse(chemicalName, i); - i -= (charsMatched - 1); - if (charsMatched == currentNode.getValue().length()){ - if (currentNode.isEndPoint()) { - if (indexes == null) { - indexes = new ArrayList(); - } - indexes.add(i); - } - } - else{ + node = node.getChild(chemicalName.charAt(i)); + if (node == null) { + break; + } + int nodeLength = node.getValue().length(); + if (nodeLength > 1) { + int charsMatched = node.getNumberOfMatchingCharactersInReverse(chemicalName, i); + if (charsMatched != nodeLength) { break; } - } else { - break; + i -= (charsMatched - 1); + } + if (node.isEndPoint()) { + if (indexes == null) { + indexes = new ArrayList(); + } + indexes.add(i); } } return indexes; diff -Nru opsin-1.5.0/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/OpsinResult.java opsin-2.3.1/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/OpsinResult.java --- opsin-1.5.0/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/OpsinResult.java 2013-07-21 14:02:29.000000000 +0000 +++ opsin-2.3.1/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/OpsinResult.java 2017-07-23 20:55:18.000000000 +0000 @@ -1,8 +1,11 @@ package uk.ac.cam.ch.wwmm.opsin; +import java.util.Collections; +import java.util.List; + import org.apache.log4j.Logger; -import nu.xom.Element; +import uk.ac.cam.ch.wwmm.opsin.OpsinWarning.OpsinWarningType; /** * Holds the structure OPSIN has generated from a name @@ -16,26 +19,55 @@ private final OPSIN_RESULT_STATUS status; private final String message; private final String chemicalName; - private Element cml = null; - private String smiles = null; + private final List warnings; /** - * Whether parsing the chemical name was successful, - * encountered problems or was unsuccessful. + * Whether parsing the chemical name was successful, encountered problems or was unsuccessful.
+ * If the result is not {@link OPSIN_RESULT_STATUS#FAILURE} then a structure has been generated * @author dl387 * */ public enum OPSIN_RESULT_STATUS{ + /** + * OPSIN successfully interpreted the name + */ SUCCESS, + /** + * OPSIN interpreted the name but detected a potential problem e.g. could not interpret stereochemistry
+ * Currently, by default, WARNING is not used as stereochemistry failures are treated as failures
+ * In the future, ambiguous chemical names may produce WARNING + */ WARNING, + /** + * OPSIN failed to interpret the name + */ FAILURE } + + OpsinResult(Fragment frag, OPSIN_RESULT_STATUS status, List warnings, String chemicalName) { + this.structure = frag; + this.status = status; + StringBuilder sb = new StringBuilder(); + for (int i = 0, l = warnings.size(); i < l; i++) { + OpsinWarning warning = warnings.get(i); + sb.append(warning.getType().toString()); + sb.append(": "); + sb.append(warning.getMessage()); + if (i + 1 < l){ + sb.append("; "); + } + } + this.message = sb.toString(); + this.chemicalName = chemicalName; + this.warnings = warnings; + } - OpsinResult(Fragment frag, OPSIN_RESULT_STATUS status, String message, String chemicalName){ + OpsinResult(Fragment frag, OPSIN_RESULT_STATUS status, String message, String chemicalName) { this.structure = frag; this.status = status; this.message = message; this.chemicalName = chemicalName; + this.warnings = Collections.emptyList(); } Fragment getStructure() { @@ -45,7 +77,7 @@ /** * Returns an enum with values SUCCESS, WARNING and FAILURE * Currently warning is never used - * @return OPSIN_RESULT_STATUS status + * @return {@link OPSIN_RESULT_STATUS} status */ public OPSIN_RESULT_STATUS getStatus() { return status; @@ -54,55 +86,123 @@ /** * Returns a message explaining why generation of a molecule from the name failed * This string will be blank when no problems were encountered - * @return String message + * @return String explaining problems encountered */ public String getMessage() { return message; } /** - * Returns the chemical name that this OpsinResult was generated frm - * @return String chemicalName + * Returns the chemical name that this OpsinResult was generated from + * @return String containing the original chemical name */ public String getChemicalName() { return chemicalName; } /** - * Lazily evaluates and returns the CML corresponding to the molecule described by the name + * Generates the CML corresponding to the molecule described by the name + * If name generation failed i.e. the OPSIN_RESULT_STATUS is FAILURE then null is returned + * @return Chemical Markup Language as a String + */ + public String getCml() { + if (structure != null){ + try{ + return CMLWriter.generateCml(structure, chemicalName); + } + catch (Exception e) { + LOG.debug("CML generation failed", e); + } + } + return null; + } + + /** + * Generates the CML corresponding to the molecule described by the name * If name generation failed i.e. the OPSIN_RESULT_STATUS is FAILURE then null is returned - * @return Element cml + * The CML is indented + * @return Idented Chemical Markup Language as a String */ - public synchronized Element getCml() { - if (cml ==null && structure!=null){ + public String getPrettyPrintedCml() { + if (structure != null){ try{ - cml = structure.toCMLMolecule(chemicalName); + return CMLWriter.generateIndentedCml(structure, chemicalName); } catch (Exception e) { LOG.debug("CML generation failed", e); - cml = null; } } - return cml; + return null; } /** - * Lazily evaluates and returns the SMILES corresponding to the molecule described by the name + * Generates the SMILES corresponding to the molecule described by the name * If name generation failed i.e. the OPSIN_RESULT_STATUS is FAILURE then null is returned - * @return String smiles + * @return SMILES as a String */ - public synchronized String getSmiles() { - if (smiles ==null && structure!=null){ + public String getSmiles() { + if (structure != null){ try{ - smiles = new SMILESWriter(structure).generateSmiles(); + return SMILESWriter.generateSmiles(structure); } catch (Exception e) { LOG.debug("SMILES generation failed", e); - smiles = null; } } - return smiles; + return null; } + /** + * Experimental function that generates the extended SMILES corresponding to the molecule described by the name + * If name generation failed i.e. the OPSIN_RESULT_STATUS is FAILURE then null is returned + * If the molecule doesn't utilise any features made possible by extended SMILES this is equivalent to {@link #getSmiles()} + * @return Extended SMILES as a String + */ + public String getExtendedSmiles() { + if (structure != null){ + try{ + return SMILESWriter.generateExtendedSmiles(structure); + } + catch (Exception e) { + LOG.debug("Extended SMILES generation failed", e); + } + } + return null; + } + + /** + * A list of warnings encountered when the result was {@link OPSIN_RESULT_STATUS#WARNING}
+ * This list of warnings is immutable + * @return A list of {@link OpsinWarning} + */ + public List getWarnings() { + return Collections.unmodifiableList(warnings); + } + /** + * Convenience method to check if one of the associated OPSIN warnings was {@link OpsinWarningType#APPEARS_AMBIGUOUS} + * @return true if name appears to be ambiguous + */ + public boolean nameAppearsToBeAmbiguous() { + for (OpsinWarning warning : warnings) { + if (warning.getType() == OpsinWarningType.APPEARS_AMBIGUOUS) { + return true; + } + } + return false; + } + + /** + * Convenience method to check if one of the associated OPSIN warnings was {@link OpsinWarningType#STEREOCHEMISTRY_IGNORED} + * @return true if stereochemistry was ignored to interpret the name + */ + public boolean stereochemistryIgnored() { + for (OpsinWarning warning : warnings) { + if (warning.getType() == OpsinWarningType.STEREOCHEMISTRY_IGNORED) { + return true; + } + } + return false; + } + } diff -Nru opsin-1.5.0/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/OpsinTools.java opsin-2.3.1/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/OpsinTools.java --- opsin-1.5.0/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/OpsinTools.java 2013-07-21 14:02:29.000000000 +0000 +++ opsin-2.3.1/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/OpsinTools.java 2017-07-23 20:55:18.000000000 +0000 @@ -1,19 +1,13 @@ package uk.ac.cam.ch.wwmm.opsin; +import java.util.ArrayDeque; import java.util.ArrayList; +import java.util.Deque; import java.util.HashSet; -import java.util.LinkedList; import java.util.List; import java.util.Set; import java.util.regex.Pattern; - - -import nu.xom.Attribute; -import nu.xom.Element; -import nu.xom.Elements; -import nu.xom.Node; - import static uk.ac.cam.ch.wwmm.opsin.XmlDeclarations.*; /** @@ -22,116 +16,99 @@ * */ class OpsinTools { - static final Pattern MATCH_COLON = Pattern.compile(":"); + static final Pattern MATCH_COLONORSEMICOLON = Pattern.compile("[:;]"); - static final Pattern MATCH_COMMA = Pattern.compile(","); - static final Pattern MATCH_DASH = Pattern.compile("-"); - static final Pattern MATCH_SEMICOLON = Pattern.compile(";"); - static final Pattern MATCH_SLASH = Pattern.compile("/"); - static final Pattern MATCH_SPACE =Pattern.compile(" "); - static final Pattern MATCH_WHITESPACE = Pattern.compile("\\s+"); - + static final Pattern MATCH_AMINOACID_STYLE_LOCANT =Pattern.compile("([A-Z][a-z]?)('*)((\\d+[a-z]?|alpha|beta|gamma|delta|epsilon|zeta|eta|omega)'*)"); static final Pattern MATCH_ELEMENT_SYMBOL =Pattern.compile("[A-Z][a-z]?"); static final Pattern MATCH_ELEMENT_SYMBOL_LOCANT =Pattern.compile("[A-Z][a-z]?'*"); - static final Pattern MATCH_NUMERIC_LOCANT =Pattern.compile("\\d+[a-z]?'*"); + static final Pattern MATCH_NUMERIC_LOCANT =Pattern.compile("(\\d+)[a-z]?'*"); static final char END_OF_SUBSTITUENT = '\u00e9'; static final char END_OF_MAINGROUP = '\u00e2'; static final char END_OF_FUNCTIONALTERM = '\u00FB'; + static final String NEWLINE = System.getProperty("line.separator"); + /** * Returns the next sibling suffix node which is not related to altering charge (ium/ide/id) * @param currentEl */ static Element getNextNonChargeSuffix(Element currentEl) { - Element next = (Element) XOMTools.getNextSibling(currentEl); + Element next = getNextSibling(currentEl); while (next != null) { - if (next.getLocalName().equals(SUFFIX_EL) && !CHARGE_TYPE_VAL.equals(next.getAttributeValue(TYPE_ATR))){ + if (next.getName().equals(SUFFIX_EL) && !CHARGE_TYPE_VAL.equals(next.getAttributeValue(TYPE_ATR))){ return next; } - next = (Element) XOMTools.getNextSibling(next); + next = getNextSibling(next); } return null; } /** - * Returns an arrayList of elements corresponding to the Elements given - * @param elements - * @return The new arrayList - */ - static ArrayList elementsToElementArrayList(Elements elements) { - ArrayList elementList =new ArrayList(elements.size()); - for (int i = 0, n=elements.size(); i < n; i++) { - elementList.add(elements.get(i)); - } - return elementList; - } - - /** * Returns a new list containing the elements of list1 followed by list2 * @param list1 * @param list2 * @return The new list */ - static ArrayList combineElementLists(List list1, List list2) { - ArrayList elementList =new ArrayList(list1); + static List combineElementLists(List list1, List list2) { + List elementList = new ArrayList(list1); elementList.addAll(list2); return elementList; } /** * Returns the previous group. This group element need not be a sibling - * @param current: starting node + * @param current: starting element * @return */ - static Node getPreviousGroup(Element current) { - if (current.getLocalName().equals(GROUP_EL)){//can start with a group or the sub/root the group is in - current=(Element)current.getParent(); + static Element getPreviousGroup(Element current) { + if (current.getName().equals(GROUP_EL)) {//can start with a group or the sub/root the group is in + current = current.getParent(); } - Element parent = (Element) current.getParent(); - if (parent == null || parent.getLocalName().equals(WORDRULE_EL)){ + Element parent = current.getParent(); + if (parent == null || parent.getName().equals(WORDRULE_EL)) { return null; } int index = parent.indexOf(current); - if (index ==0) return getPreviousGroup(parent);//no group found - Element previous =(Element) parent.getChild(index-1); - Elements children =previous.getChildElements(); - while (children.size()!=0){ - previous =children.get(children.size()-1); - children =previous.getChildElements(); + if (index ==0) { + return getPreviousGroup(parent);//no group found + } + Element previous = parent.getChild(index - 1); + while (previous.getChildCount() != 0) { + previous = previous.getChild(previous.getChildCount() - 1); } - Elements groups =((Element)previous.getParent()).getChildElements(GROUP_EL); - if (groups.size()==0){ + List groups = previous.getParent().getChildElements(GROUP_EL); + if (groups.size() == 0){ return getPreviousGroup(previous); } else{ - return groups.get(groups.size()-1);//return last group if multiple exist e.g. fused ring + return groups.get(groups.size() - 1);//return last group if multiple exist e.g. fused ring } } /** * Returns the next group. This group element need not be a sibling - * @param current: starting node + * @param current: starting element * @return */ - static Node getNextGroup(Element current) { - if (current.getLocalName().equals(GROUP_EL)){//can start with a group or the sub/root the group is in - current=(Element)current.getParent(); + static Element getNextGroup(Element current) { + if (current.getName().equals(GROUP_EL)) {//can start with a group or the sub/root the group is in + current = current.getParent(); } - Element parent = (Element) current.getParent(); - if (parent == null || parent.getLocalName().equals(MOLECULE_EL)){ + Element parent = current.getParent(); + if (parent == null || parent.getName().equals(MOLECULE_EL)) { return null; } int index = parent.indexOf(current); - if (index ==parent.getChildElements().size()-1) return getNextGroup(parent);//no group found - Element next =(Element) parent.getChild(index +1); - Elements children =next.getChildElements(); - while (children.size()!=0){ - next =children.get(0); - children =next.getChildElements(); + if (index == parent.getChildCount() - 1) { + return getNextGroup(parent);//no group found } - Elements groups =((Element)next.getParent()).getChildElements(GROUP_EL); - if (groups.size()==0){ + Element next = parent.getChild(index + 1); + while (next.getChildCount() != 0){ + next = next.getChild(0); + } + List groups = next.getParent().getChildElements(GROUP_EL); + if (groups.size() == 0){ return getNextGroup(next); } else{ @@ -146,11 +123,11 @@ * @return wordRule Element */ static Element getParentWordRule(Element el) { - Element parent=(Element)el.getParent(); - while(parent !=null && !parent.getLocalName().equals(WORDRULE_EL)){ - parent =(Element)parent.getParent(); + Element parent = el.getParent(); + while(parent != null && !parent.getName().equals(WORDRULE_EL)){ + parent = parent.getParent(); } - if (parent==null){ + if (parent == null){ throw new RuntimeException("Cannot find enclosing wordRule element"); } else{ @@ -158,21 +135,6 @@ } } - /**Makes a shallow copy of an element, copying the element - * and the attributes, but no other child nodes. - * - * @param elem The element to copy. - * @return The copied element. - */ - static Element shallowCopy(Element elem) { - Element newElem = new Element(elem.getLocalName()); - int attributeCount = elem.getAttributeCount(); - for(int i=0; i < attributeCount;i++) { - newElem.addAttribute(new Attribute(elem.getAttribute(i))); - } - return newElem; - } - /** * Searches in a depth-first manner for a non-suffix atom that has the target non element symbol locant * Returns either that atom or null if one cannot be found @@ -181,11 +143,11 @@ * @return the matching atom or null */ static Atom depthFirstSearchForNonSuffixAtomWithLocant(Atom startingAtom, String targetLocant) { - LinkedList stack = new LinkedList(); + Deque stack = new ArrayDeque(); stack.add(startingAtom); - Set atomsVisited =new HashSet(); + Set atomsVisited = new HashSet(); while (stack.size() > 0) { - Atom currentAtom =stack.removeLast(); + Atom currentAtom = stack.removeLast(); atomsVisited.add(currentAtom); List neighbours = currentAtom.getAtomNeighbours(); for (Atom neighbour : neighbours) { @@ -196,12 +158,10 @@ locants.removeAll(neighbour.getElementSymbolLocants()); //A main group atom, would expect to only find one except in something strange like succinimide - //The locants.size()>0 condition allows things like terephthalate to work which have an atom between the suffixes and main atoms that has no locant - if (locants.size()>0 && !neighbour.getType().equals(SUFFIX_TYPE_VAL)){ - for (String neighbourLocant : locants) { - if (targetLocant.equals(neighbourLocant)){ - return neighbour; - } + //The locants.size() > 0 condition allows things like terephthalate to work which have an atom between the suffixes and main atoms that has no locant + if (locants.size() > 0 && !neighbour.getType().equals(SUFFIX_TYPE_VAL)){ + if (locants.contains(targetLocant)){ + return neighbour; } continue; } @@ -218,11 +178,11 @@ * @return the matching atom or null */ static Atom depthFirstSearchForAtomWithNumericLocant(Atom startingAtom){ - LinkedList stack = new LinkedList(); + Deque stack = new ArrayDeque(); stack.add(startingAtom); - Set atomsVisited =new HashSet(); + Set atomsVisited = new HashSet(); while (stack.size() > 0) { - Atom currentAtom =stack.removeLast(); + Atom currentAtom = stack.removeLast(); atomsVisited.add(currentAtom); List neighbours = currentAtom.getAtomNeighbours(); for (Atom neighbour : neighbours) { @@ -248,14 +208,14 @@ * @throws ParsingException */ static WordType determineWordType(List annotations) throws ParsingException { - Character finalAnnotation = annotations.get(annotations.size() -1); - if (finalAnnotation.equals(END_OF_MAINGROUP)){ + char finalAnnotation = annotations.get(annotations.size() - 1); + if (finalAnnotation == END_OF_MAINGROUP) { return WordType.full; } - else if (finalAnnotation.equals(END_OF_SUBSTITUENT)){ + else if (finalAnnotation == END_OF_SUBSTITUENT) { return WordType.substituent; } - else if (finalAnnotation.equals(END_OF_FUNCTIONALTERM)){ + else if (finalAnnotation == END_OF_FUNCTIONALTERM) { return WordType.functionalTerm; } else{ @@ -263,4 +223,437 @@ } } + + /**Gets the next sibling of a given element. + * + * @param element The reference element. + * @return The next Sibling, or null. + */ + static Element getNextSibling(Element element) { + Element parent = element.getParent(); + int i = parent.indexOf(element); + if (i + 1 >= parent.getChildCount()) { + return null; + } + return parent.getChild(i + 1); + } + + /**Gets the first next sibling of a given element whose element name matches the given string. + * + * @param current The reference element. + * @param elName The element name to look for + * @return The matched next Sibling, or null. + */ + static Element getNextSibling(Element current, String elName) { + Element next = getNextSibling(current); + while (next != null) { + if (next.getName().equals(elName)){ + return next; + } + next = getNextSibling(next); + } + return null; + } + + /**Gets the previous sibling of a given element. + * + * @param element The reference element. + * @return The previous Sibling, or null. + */ + static Element getPreviousSibling(Element element) { + Element parent = element.getParent(); + int i = parent.indexOf(element); + if (i == 0) { + return null; + } + return parent.getChild(i - 1); + } + + /**Gets the first previous sibling of a given element whose element name matches the given string. + * + * @param current The reference element. + * @param elName The element name of a element to look for + * @return The matched previous Sibling, or null. + */ + static Element getPreviousSibling(Element current, String elName) { + Element prev = getPreviousSibling(current); + while (prev != null) { + if (prev.getName().equals(elName)){ + return prev; + } + prev = getPreviousSibling(prev); + } + return null; + } + + /**Inserts a element so that it occurs before a reference element. The new element + * must not currently have a parent. + * + * @param element The reference element. + * @param newElement The new element to insert. + */ + static void insertBefore(Element element, Element newElement) { + Element parent = element.getParent(); + int i = parent.indexOf(element); + parent.insertChild(newElement, i); + } + + /**Inserts an element so that it occurs after a reference element. The new element + * must not currently have a parent. + * + * @param element The reference element. + * @param neweElement The new element to insert. + */ + static void insertAfter(Element element, Element neweElement) { + Element parent = element.getParent(); + int i = parent.indexOf(element); + parent.insertChild(neweElement, i + 1); + } + + /** + * Gets the next element. This element need not be a sibling + * @param element: starting element + * @return + */ + static Element getNext(Element element) { + Element parent = element.getParent(); + if (parent == null || parent.getName().equals(XmlDeclarations.MOLECULE_EL)){ + return null; + } + int index = parent.indexOf(element); + if (index + 1 >= parent.getChildCount()) { + return getNext(parent);//reached end of element + } + Element next = parent.getChild(index + 1); + while (next.getChildCount() > 0){ + next = next.getChild(0); + } + return next; + } + + /** + * Gets the previous element. This element need not be a sibling + * @param element: starting element + * @return + */ + static Element getPrevious(Element element) { + Element parent = element.getParent(); + if (parent == null || parent.getName().equals(XmlDeclarations.MOLECULE_EL)){ + return null; + } + int index = parent.indexOf(element); + if (index == 0) { + return getPrevious(parent);//reached beginning of element + } + Element previous = parent.getChild(index - 1); + while (previous.getChildCount() > 0){ + previous = previous.getChild(previous.getChildCount() - 1); + } + return previous; + } + + /** + * Returns a list containing sibling elements with the given element name after the given element. + * These elements need not be continuous + * @param currentElem: the element to look for following siblings of + * @param elName: the name of the elements desired + * @return + */ + static List getNextSiblingsOfType(Element currentElem, String elName) { + List laterSiblingElementsOfType = new ArrayList(); + Element parent = currentElem.getParent(); + if (parent == null){ + return laterSiblingElementsOfType; + } + int indexOfCurrentElem = parent.indexOf(currentElem); + for (int i = indexOfCurrentElem + 1; i < parent.getChildCount(); i++) { + Element child = parent.getChild(i); + if (child.getName().equals(elName)) { + laterSiblingElementsOfType.add(child); + } + } + return laterSiblingElementsOfType; + } + + /** + * Returns a list containing sibling elements with the given element name after the given element. + * @param currentElem: the element to look for following siblings of + * @param elName: the name of the elements desired + * @return + */ + static List getNextAdjacentSiblingsOfType(Element currentElem, String elName) { + List siblingElementsOfType = new ArrayList(); + Element parent = currentElem.getParent(); + if (parent == null){ + return siblingElementsOfType; + } + Element nextSibling = getNextSibling(currentElem); + while (nextSibling != null && nextSibling.getName().equals(elName)){ + siblingElementsOfType.add(nextSibling); + nextSibling = getNextSibling(nextSibling); + } + return siblingElementsOfType; + } + + /** + * Returns a list containing sibling elements with the given element names after the given element. + * These elements need not be continuous and are returned in the order encountered + * @param currentElem: the element to look for following siblings of + * @param elNames: An array of the names of the elements desired + * @return + */ + static List getNextSiblingsOfTypes(Element currentElem, String[] elNames){ + List laterSiblingElementsOfTypes = new ArrayList(); + currentElem = getNextSibling(currentElem); + while (currentElem != null){ + String name = currentElem.getName(); + for (String elName : elNames) { + if (name.equals(elName)){ + laterSiblingElementsOfTypes.add(currentElem); + break; + } + } + currentElem = getNextSibling(currentElem); + } + return laterSiblingElementsOfTypes; + } + + /** + * Returns a list containing sibling elements with the given element name before the given element. + * These elements need not be continuous + * @param currentElem: the element to look for previous siblings of + * @param elName: the name of the elements desired + * @return + */ + static List getPreviousSiblingsOfType(Element currentElem, String elName) { + List earlierSiblingElementsOfType = new ArrayList(); + Element parent = currentElem.getParent(); + if (parent == null){ + return earlierSiblingElementsOfType; + } + int indexOfCurrentElem = parent.indexOf(currentElem); + for (int i = 0; i < indexOfCurrentElem; i++) { + Element child = parent.getChild(i); + if (child.getName().equals(elName)) { + earlierSiblingElementsOfType.add(child); + } + } + return earlierSiblingElementsOfType; + } + + /** + * Gets the next sibling element of the given element. If this element's name is within the elementsToIgnore array this is repeated + * If no appropriate element can be found null is returned + * @param startingEl + * @param elNamesToIgnore + * @return + */ + static Element getNextSiblingIgnoringCertainElements(Element startingEl, String[] elNamesToIgnore){ + Element parent = startingEl.getParent(); + if (parent == null){ + return null; + } + int i = parent.indexOf(startingEl); + if (i + 1 >= parent.getChildCount()) { + return null; + } + Element next = parent.getChild(i + 1); + String elName = next.getName(); + for (String namesToIgnore : elNamesToIgnore) { + if (elName.equals(namesToIgnore)){ + return getNextSiblingIgnoringCertainElements(next, elNamesToIgnore); + } + } + return next; + } + + /** + * Gets the previous sibling element of the given element. If this element's name is within the elementsToIgnore array this is repeated + * If no appropriate element can be found null is returned + * @param startingEl + * @param elNamesToIgnore + * @return + */ + static Element getPreviousSiblingIgnoringCertainElements(Element startingEl, String[] elNamesToIgnore){ + Element parent = startingEl.getParent(); + if (parent == null){ + return null; + } + int i = parent.indexOf(startingEl); + if (i == 0) { + return null; + } + Element previous = parent.getChild(i - 1); + String elName = previous.getName(); + for (String namesToIgnore : elNamesToIgnore) { + if (elName.equals(namesToIgnore)){ + return getPreviousSiblingIgnoringCertainElements(previous, elNamesToIgnore); + } + } + return previous; + } + + /** + * Finds all descendant elements whose name matches the given element name + * @param startingElement + * @param elementName + * @return + */ + static List getDescendantElementsWithTagName(Element startingElement, String elementName) { + List matchingElements = new ArrayList(); + Deque stack = new ArrayDeque(); + for (int i = startingElement.getChildCount() - 1; i >= 0; i--) { + stack.add(startingElement.getChild(i)); + } + while (stack.size() > 0){ + Element currentElement = stack.removeLast(); + if (currentElement.getName().equals(elementName)){ + matchingElements.add(currentElement); + } + for (int i = currentElement.getChildCount() - 1; i >= 0; i--) { + stack.add(currentElement.getChild(i)); + } + } + return matchingElements; + } + + /** + * Finds all descendant elements whose element name matches one of the strings in elementNames + * @param startingElement + * @param elementNames + * @return + */ + static List getDescendantElementsWithTagNames(Element startingElement, String[] elementNames) { + List matchingElements = new ArrayList(); + Deque stack = new ArrayDeque(); + for (int i = startingElement.getChildCount() - 1; i >= 0; i--) { + stack.add(startingElement.getChild(i)); + } + while (stack.size()>0){ + Element currentElement = stack.removeLast(); + String currentElName = currentElement.getName(); + for (String targetTagName : elementNames) { + if (currentElName.equals(targetTagName)){ + matchingElements.add(currentElement); + break; + } + } + for (int i = currentElement.getChildCount() - 1; i >= 0; i--) { + stack.add(currentElement.getChild(i)); + } + } + return matchingElements; + } + + /** + * Finds all child elements whose element name matches one of the strings in elementNames + * @param startingElement + * @param elementNames + * @return + */ + static List getChildElementsWithTagNames(Element startingElement, String[] elementNames) { + List matchingElements = new ArrayList(); + for (int i = 0, l = startingElement.getChildCount(); i < l; i++) { + Element child = startingElement.getChild(i); + String currentElName = child.getName(); + for (String targetTagName : elementNames) { + if (currentElName.equals(targetTagName)){ + matchingElements.add(child); + break; + } + } + } + return matchingElements; + } + + /** + * Finds all descendant elements whose element name matches the given elementName + * Additionally the element must have the specified attribute and the value of the attribute must be as specified + * @param startingElement + * @param elementName + * @param attributeName + * @param attributeValue + * @return + */ + static List getDescendantElementsWithTagNameAndAttribute(Element startingElement, String elementName, String attributeName, String attributeValue) { + List matchingElements = new ArrayList(); + Deque stack = new ArrayDeque(); + for (int i = startingElement.getChildCount() - 1; i >= 0; i--) { + stack.add(startingElement.getChild(i)); + } + while (stack.size() > 0){ + Element currentElement =stack.removeLast(); + if (currentElement.getName().equals(elementName)){ + if (attributeValue.equals(currentElement.getAttributeValue(attributeName))){ + matchingElements.add(currentElement); + } + } + for (int i = currentElement.getChildCount() - 1; i >= 0; i--) { + stack.add(currentElement.getChild(i)); + } + } + return matchingElements; + } + + /** + * Finds all child elements whose element name matches the given elementName + * Additionally the element must have the specified attribute and the value of the attribute must be as specified + * @param startingElement + * @param elementName + * @return + */ + static List getChildElementsWithTagNameAndAttribute(Element startingElement, String elementName, String attributeName, String attributeValue) { + List matchingElements = new ArrayList(); + for (int i = 0, l = startingElement.getChildCount(); i < l; i++) { + Element child = startingElement.getChild(i); + if (child.getName().equals(elementName)){ + if (attributeValue.equals(child.getAttributeValue(attributeName))){ + matchingElements.add(child); + } + } + } + return matchingElements; + } + + /** + * Finds and returns the number of elements and the number of elements with no children, that are descendants of the startingElement + * The 0th position of the returned array is the total number of elements + * The 1st position is the number of child less elements + * @param startingElement + * @return + */ + static int[] countNumberOfElementsAndNumberOfChildLessElements(Element startingElement) { + int[] counts = new int[2]; + Deque stack = new ArrayDeque(); + stack.add(startingElement); + while (stack.size() > 0){ + Element currentElement = stack.removeLast(); + int childCount = currentElement.getChildCount(); + if (childCount == 0) { + counts[1]++; + } + else{ + stack.addAll(currentElement.getChildElements()); + counts[0] += childCount; + } + } + return counts; + } + + /** + * Find all the later siblings of startingElement up until there is no more siblings or an + * element with the given element name is reached (exclusive of that element) + * @param startingEl + * @param elName + * @return + */ + static List getSiblingsUpToElementWithTagName(Element startingEl, String elName) { + List laterSiblings = new ArrayList(); + Element nextEl = getNextSibling(startingEl); + while (nextEl != null && !nextEl.getName().equals(elName)){ + laterSiblings.add(nextEl); + nextEl = getNextSibling(nextEl); + } + return laterSiblings; + } } diff -Nru opsin-1.5.0/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/OpsinWarning.java opsin-2.3.1/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/OpsinWarning.java --- opsin-1.5.0/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/OpsinWarning.java 1970-01-01 00:00:00.000000000 +0000 +++ opsin-2.3.1/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/OpsinWarning.java 2017-07-23 20:55:18.000000000 +0000 @@ -0,0 +1,58 @@ +package uk.ac.cam.ch.wwmm.opsin; + +/** + * A warning generated by OPSIN while interpreting a name.
+ * The specifics of the warning may be used to judge whether you want to accept the generated structure. + */ +public class OpsinWarning { + + /** + * The type of problem OPSIN encountered + */ + public enum OpsinWarningType { + /**OPSIN ignored stereochemistry from the input name to give this structure. This can have various causes
: + * OPSIN doesn't support interpretation of the type of stereochemistry + * OPSIN stereo-perception doesn't support this type of stereocentre + * The name describes the wrong structure + * The stereochemistry is being requested at the wrong atom/bond */ + STEREOCHEMISTRY_IGNORED("Stereochemical term ignored"), + /**OPSIN made a choice that appeared to be ambiguous to give this structure i.e. the name may describe multiple possible structures
+ *The name may be missing locants
+ *Alternatively the name could actually be a trivial rather than systematic name
+ *OPSIN tries to make sensible choices when choosing in ambiguous cases so the resultant structure may nonetheless be the intended one*/ + APPEARS_AMBIGUOUS("This names appears to be ambiguous"); + + private final String explanation; + + private OpsinWarningType(String explanation) { + this.explanation = explanation; + } + + public String getExplanation() { + return explanation; + } + + } + + private final OpsinWarningType type; + private final String message; + + OpsinWarning(OpsinWarningType type, String message) { + this.type = type; + this.message = message; + } + + /** + * @return The type of the warning c.f. {@link OpsinWarningType} + */ + public OpsinWarningType getType() { + return type; + } + + /** + * @return The message describing the specific cause of this warning + */ + public String getMessage() { + return message; + } +} diff -Nru opsin-1.5.0/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/Parser.java opsin-2.3.1/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/Parser.java --- opsin-1.5.0/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/Parser.java 2013-07-21 14:02:29.000000000 +0000 +++ opsin-2.3.1/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/Parser.java 2017-07-23 20:55:18.000000000 +0000 @@ -1,19 +1,17 @@ package uk.ac.cam.ch.wwmm.opsin; import java.io.IOException; +import java.util.ArrayDeque; import java.util.ArrayList; import java.util.Arrays; -import java.util.LinkedList; +import java.util.Deque; import java.util.List; import java.util.regex.Matcher; import java.util.regex.Pattern; - -import nu.xom.Attribute; -import nu.xom.Element; +import org.apache.log4j.Logger; import static uk.ac.cam.ch.wwmm.opsin.XmlDeclarations.*; -import static uk.ac.cam.ch.wwmm.opsin.OpsinTools.*; /**Conducts finite-state parsing on chemical names. * Adds XML annotation to the semantic constituents of the name. @@ -31,15 +29,16 @@ private final ResourceManager resourceManager; private final ParseRules parseRules; - private final static Pattern matchSemiColonSpace = Pattern.compile("; "); - private final static Pattern matchStoichiometryIndication = Pattern.compile("[ ]?[\\{\\[\\(](\\d+|\\?)([:/](\\d+|\\?))+[\\}\\]\\)]$"); + private static final Pattern matchSemiColonSpace = Pattern.compile("; "); + private static final Pattern matchStoichiometryIndication = Pattern.compile("[ ]?[\\{\\[\\(](\\d+|\\?)([:/](\\d+|\\?))+[\\}\\]\\)]$"); + private static final Logger LOG = Logger.getLogger(Parser.class); /** * No-argument constructor. Uses ResouceGetter found at * uk/ac/cam/ch/wwmm/opsin/resources/ * @throws IOException */ - Parser() throws IOException{ + Parser() throws IOException { ResourceGetter resources = new ResourceGetter("uk/ac/cam/ch/wwmm/opsin/resources/"); this.wordRules = new WordRules(resources); this.resourceManager = new ResourceManager(resources); @@ -92,8 +91,9 @@ parse = tokenizationResult.getParse(); } } - boolean allowSpaceRemoval = parse ==null ? true : false; - if (parse == null){ + boolean allowSpaceRemoval; + if (parse == null) { + allowSpaceRemoval = true; TokenizationResult tokenizationResult = tokeniser.tokenize(name , true); if (tokenizationResult.isSuccessfullyTokenized()){ parse = tokenizationResult.getParse(); @@ -108,52 +108,68 @@ } } } + else { + allowSpaceRemoval = false; + } List parses = generateParseCombinations(parse); - if (parses.size()==0){ + if (parses.size()==0) { throw new ParsingException("No parses could be found for " + name); } List results = new ArrayList(); + ParsingException preciseException = null; for(Parse pp : parses) { - Element moleculeEl = new Element(MOLECULE_EL); + Element moleculeEl = new GroupingEl(MOLECULE_EL); moleculeEl.addAttribute(new Attribute(NAME_ATR, name)); for(ParseWord pw : pp.getWords()) { - Element word = new Element(WORD_EL); - moleculeEl.appendChild(word); - if (pw.getParseTokens().size() >1){ - throw new ParsingException("OPSIN bug: parseWord had multiple annotations after creating additional parses step"); + Element word = new GroupingEl(WORD_EL); + moleculeEl.addChild(word); + if (pw.getParseTokens().size() != 1){ + throw new ParsingException("OPSIN bug: parseWord should have exactly 1 annotations after creating additional parses step"); } - - WordType wordType = OpsinTools.determineWordType(pw.getParseTokens().get(0).getAnnotations()); + ParseTokens tokensForWord = pw.getParseTokens().get(0); + WordType wordType = OpsinTools.determineWordType(tokensForWord.getAnnotations()); word.addAttribute(new Attribute(TYPE_ATR, wordType.toString())); - if (pw.getWord().startsWith("-")){//we want -acid to be the same as acid + if (pw.getWord().startsWith("-")){//we want -functionalterm to be the same as functionalterm word.addAttribute(new Attribute(VALUE_ATR, pw.getWord().substring(1))); } else{ word.addAttribute(new Attribute(VALUE_ATR, pw.getWord())); } - for(ParseTokens pt : pw.getParseTokens()) { - writeWordXML(word, pw, pt.getTokens(), WordTools.chunkAnnotations(pt.getAnnotations())); - } + writeWordXML(word, tokensForWord.getTokens(), WordTools.chunkAnnotations(tokensForWord.getAnnotations())); } /* All words are placed into a wordRule. * Often multiple words in the same wordRule. * WordRules can be nested within each other e.g. in Carbonyl cyanide m-chlorophenyl hydrazone -> * Carbonyl cyanide m-chlorophenyl hydrazone */ + try { + wordRules.groupWordsIntoWordRules(moleculeEl, n2sConfig, allowSpaceRemoval, componentRatios); + } catch (ParsingException e) { + if(LOG.isDebugEnabled()) { + LOG.debug(e.getMessage(), e); + } + // Using that parse no word rules matched + continue; + } try{ - wordRules.groupWordsIntoWordRules(n2sConfig, moleculeEl, allowSpaceRemoval); - if (componentRatios!=null){ + if (componentRatios != null){ applyStoichiometryIndicationToWordRules(moleculeEl, componentRatios); } + if (moleculeEl.getAttributeValue(ISSALT_ATR) != null && moleculeEl.getChildElements(WORDRULE_EL).size() < 2) { + throw new ParsingException(name + " is apparently a salt, but the name only contained one component. The name could be describing a class of compounds"); + } results.add(moleculeEl); + } catch (ParsingException e) { + preciseException = e; } - catch (ParsingException e) { - // Using that parse no word rules matched - } + } - if (results.size()==0){ + if (results.size() == 0) { + if (preciseException != null) { + throw preciseException; + } throw new ParsingException(name + " could be parsed but OPSIN was unsure of the meaning of the words. This error will occur, by default, if a name is just a substituent"); } @@ -163,9 +179,9 @@ static Integer[] processStoichiometryIndication(String ratioString) throws ParsingException { ratioString = ratioString.trim(); ratioString = ratioString.substring(1, ratioString.length()-1); - String[] ratioStrings = MATCH_COLON.split(ratioString); + String[] ratioStrings = ratioString.split(":"); if (ratioStrings.length ==1){ - ratioStrings = MATCH_SLASH.split(ratioString); + ratioStrings = ratioString.split("/"); } Integer[] componentRatios = new Integer[ratioStrings.length]; for (int i = 0; i < ratioStrings.length; i++) { @@ -174,10 +190,10 @@ throw new ParsingException("Unexpected / in component ratio declaration"); } if (currentRatio.equals("?")){ - componentRatios[i]=1; + componentRatios[i] = 1; } else{ - componentRatios[i]=Integer.parseInt(currentRatio); + componentRatios[i] = Integer.parseInt(currentRatio); } } return componentRatios; @@ -213,7 +229,7 @@ message.append(" has no tokens unknown to OPSIN but does not conform to its grammar. "); message.append("From left to right it is unparsable due to the following being uninterpretable:"); message.append(uninterpretableLR); - message.append(" The following or which was not parseable: "); + message.append(" The following of which was not parseable: "); message.append(unparseableLR); } throw new ParsingException(message.toString()); @@ -229,11 +245,9 @@ */ private List generateParseCombinations(Parse parse) throws ParsingException { int numberOfCombinations = 1; - List parseCounts = new ArrayList(); List parseWords = parse.getWords(); for (ParseWord pw : parseWords) { int parsesForWord = pw.getParseTokens().size(); - parseCounts.add(parsesForWord); numberOfCombinations *= parsesForWord; if (numberOfCombinations > 128){//checked here to avoid integer overflow on inappropriate input throw new ParsingException("Too many different combinations of word interpretation are possible (>128) i.e. name contains too many terms that OPSIN finds ambiguous to interpret"); @@ -244,7 +258,7 @@ } List parses = new ArrayList(); - LinkedList parseQueue = new LinkedList(); + Deque parseQueue = new ArrayDeque(); parseQueue.add(new Parse(parse.getName())); while (!parseQueue.isEmpty()){ Parse currentParse = parseQueue.removeFirst(); @@ -270,33 +284,32 @@ /**Write the XML corresponding to a particular word in a parse. * * @param wordEl The empty XML word element to be written into. - * @param pw The ParseWord for the word. * @param tokens The list of tokens. * @param annotations The lists of annotations. This has been divided into a separate list per substituent/root/functionalTerm * @throws ParsingException */ - void writeWordXML(Element wordEl, ParseWord pw, List tokens, List> annotations) throws ParsingException { + void writeWordXML(Element wordEl, List tokens, List> annotations) throws ParsingException { int annotNumber = 0; int annotPos = 0; - Element chunk = new Element(SUBSTITUENT_EL); - wordEl.appendChild(chunk); + Element chunk = new GroupingEl(SUBSTITUENT_EL); + wordEl.addChild(chunk); Element lastTokenElement = null; for (String token : tokens) { if (annotPos >= annotations.get(annotNumber).size()) { annotPos = 0; annotNumber++; - chunk = new Element(SUBSTITUENT_EL); - wordEl.appendChild(chunk); + chunk = new GroupingEl(SUBSTITUENT_EL); + wordEl.addChild(chunk); lastTokenElement = null; } Element tokenElement = resourceManager.makeTokenElement(token, annotations.get(annotNumber).get(annotPos)); if (tokenElement != null) {//null for tokens that have ignoreWhenWritingXML set - chunk.appendChild(tokenElement); - lastTokenElement=tokenElement; + chunk.addChild(tokenElement); + lastTokenElement = tokenElement; } - else if (lastTokenElement!=null && !token.equals("")){ - if (lastTokenElement.getAttribute(SUBSEQUENTUNSEMANTICTOKEN_ATR)!=null){ + else if (lastTokenElement!=null && token.length() > 0){ + if (lastTokenElement.getAttribute(SUBSEQUENTUNSEMANTICTOKEN_ATR) != null){ lastTokenElement.getAttribute(SUBSEQUENTUNSEMANTICTOKEN_ATR).setValue(lastTokenElement.getAttributeValue(SUBSEQUENTUNSEMANTICTOKEN_ATR) + token); } else{ @@ -307,10 +320,10 @@ } WordType wordType = WordType.valueOf(wordEl.getAttributeValue(TYPE_ATR)); if(wordType == WordType.full) { - chunk.setLocalName(ROOT_EL); + chunk.setName(ROOT_EL); } else if(wordType == WordType.functionalTerm) { - chunk.setLocalName(FUNCTIONALTERM_EL); + chunk.setName(FUNCTIONALTERM_EL); } } @@ -322,7 +335,7 @@ * @throws ParsingException */ private void applyStoichiometryIndicationToWordRules(Element moleculeEl,Integer[] componentRatios) throws ParsingException { - List wordRules = XOMTools.getChildElementsWithTagName(moleculeEl, WORDRULE_EL); + List wordRules = moleculeEl.getChildElements(WORDRULE_EL); if (wordRules.size()!=componentRatios.length){ throw new ParsingException("Component and stoichiometry indication indication mismatch. OPSIN believes there to be " +wordRules.size() +" components but " + componentRatios.length +" ratios were given!"); } diff -Nru opsin-1.5.0/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/ParseRules.java opsin-2.3.1/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/ParseRules.java --- opsin-1.5.0/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/ParseRules.java 2013-07-21 14:02:29.000000000 +0000 +++ opsin-2.3.1/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/ParseRules.java 2017-07-23 20:55:18.000000000 +0000 @@ -1,7 +1,8 @@ package uk.ac.cam.ch.wwmm.opsin; +import java.util.ArrayDeque; import java.util.ArrayList; -import java.util.LinkedList; +import java.util.Collections; import java.util.List; import java.util.regex.Matcher; import java.util.regex.Pattern; @@ -12,7 +13,7 @@ * Instantiate via NameToStructure.getOpsinParser() * * Performs finite-state allocation of roles ("annotations") to tokens: - * The chemical name is broken down into tokens e.g. ethyl -->eth yl by applying the chemical grammar in regexes.xml + * The chemical name is broken down into tokens e.g. ethyl -->eth yl by applying the chemical grammar in regexes.xml * The tokens eth and yl are associated with a letter which is referred to here as an annotation which is the role of the token. * These letters are defined in regexes.xml and would in this case have the meaning alkaneStem and inlineSuffix * @@ -25,33 +26,28 @@ */ public class ParseRules { - /** A "struct" containing bits of state needed during finite-state parsing. */ - private static class AnnotatorState { - /** The current state of the DFA. */ - int state; - /** The annotation so far. */ - List annot; - /** The strings these annotations correspond to. */ - ArrayList tokens; - /** The index of the first char in the chemical name that has yet to be tokenised */ - int posInName; - } - /** A DFA encompassing the grammar of a chemical word. */ - private RunAutomaton chemAutomaton; + private final RunAutomaton chemAutomaton; /** The allowed symbols in chemAutomaton */ - private char[] stateSymbols; - - private final ResourceManager resourceManager; + private final char[] stateSymbols; + + private final OpsinRadixTrie[] symbolTokenNamesDict; + private final RunAutomaton[] symbolRegexAutomataDict; + private final Pattern[] symbolRegexesDict; + + private final AnnotatorState initialState; /** * Creates a left to right parser that can parse a substituent/full/functional word * @param resourceManager */ ParseRules(ResourceManager resourceManager){ - this.resourceManager = resourceManager; - chemAutomaton = resourceManager.chemicalAutomaton; - stateSymbols = chemAutomaton.getCharIntervals(); + this.chemAutomaton = resourceManager.getChemicalAutomaton(); + this.symbolTokenNamesDict = resourceManager.getSymbolTokenNamesDict(); + this.symbolRegexAutomataDict = resourceManager.getSymbolRegexAutomataDict(); + this.symbolRegexesDict = resourceManager.getSymbolRegexesDict(); + this.stateSymbols = chemAutomaton.getCharIntervals(); + this.initialState = new AnnotatorState(chemAutomaton.getInitialState(), '\0', 0, true, null); } /**Determines the possible annotations for a chemical word @@ -61,101 +57,73 @@ * For something like eth no parses would be found and the string will equal "eth" * * @param chemicalWord - * @return + * @return Results of parsing * @throws ParsingException */ public ParseRulesResults getParses(String chemicalWord) throws ParsingException { - String chemicalWordLowerCase = chemicalWord.toLowerCase(); - AnnotatorState startingAS = new AnnotatorState(); - startingAS.state = chemAutomaton.getInitialState(); - startingAS.annot = new ArrayList(); - startingAS.tokens = new ArrayList(); - startingAS.posInName = 0; - LinkedList asStack = new LinkedList(); - asStack.add(startingAS); + String chemicalWordLowerCase = StringTools.lowerCaseAsciiString(chemicalWord); + ArrayDeque asStack = new ArrayDeque(); + asStack.add(initialState); int posInNameOfLastSuccessfulAnnotations = 0; List successfulAnnotations = new ArrayList(); - AnnotatorState longestAnnotation = new AnnotatorState();//this is the longest annotation. It does not necessarily end in an accept state - longestAnnotation.state = chemAutomaton.getInitialState(); - longestAnnotation.annot = new ArrayList(); - longestAnnotation.tokens = new ArrayList(); - longestAnnotation.posInName = 0; + AnnotatorState longestAnnotation = initialState;//this is the longest annotation. It does not necessarily end in an accept state int stateSymbolsSize = stateSymbols.length; while (!asStack.isEmpty()) { AnnotatorState as = asStack.removeFirst(); - int posInName = as.posInName; - if (chemAutomaton.isAccept(as.state)){ + int posInName = as.getPosInName(); + if (chemAutomaton.isAccept(as.getState())){ if (posInName >= posInNameOfLastSuccessfulAnnotations){//this annotation is worthy of consideration if (posInName > posInNameOfLastSuccessfulAnnotations){//this annotation is longer than any previously found annotation successfulAnnotations.clear(); posInNameOfLastSuccessfulAnnotations = posInName; } - else if (successfulAnnotations.size()>128){ + else if (successfulAnnotations.size() > 128){ throw new ParsingException("Ambiguity in OPSIN's chemical grammar has produced more than 128 annotations. Parsing has been aborted. Please report this as a bug"); } successfulAnnotations.add(as); } } //record the longest annotation found so it can be reported to the user for debugging - if (posInName > longestAnnotation.posInName){ + if (posInName > longestAnnotation.getPosInName()){ longestAnnotation = as; } for (int i = 0; i < stateSymbolsSize; i++) { char annotationCharacter = stateSymbols[i]; - int potentialNextState = chemAutomaton.step(as.state, annotationCharacter); + int potentialNextState = chemAutomaton.step(as.getState(), annotationCharacter); if (potentialNextState != -1) {//-1 means this state is not accessible from the previous state - OpsinRadixTrie possibleTokenisationsTrie = resourceManager.symbolTokenNamesDict[i]; + OpsinRadixTrie possibleTokenisationsTrie = symbolTokenNamesDict[i]; if (possibleTokenisationsTrie != null) { List possibleTokenisations = possibleTokenisationsTrie.findMatches(chemicalWordLowerCase, posInName); if (possibleTokenisations != null) {//next could be a token - for (int tokenizationIndex : possibleTokenisations) { - AnnotatorState newAs = new AnnotatorState(); - newAs.posInName = tokenizationIndex; - newAs.tokens = new ArrayList(as.tokens); - newAs.tokens.add(chemicalWordLowerCase.substring(posInName, tokenizationIndex)); - newAs.annot = new ArrayList(as.annot); - newAs.annot.add(annotationCharacter); - newAs.state = potentialNextState; + for (int j = 0, l = possibleTokenisations.size(); j < l; j++) {//typically list size will be 1 so this is faster than an iterator + int tokenizationIndex = possibleTokenisations.get(j); + AnnotatorState newAs = new AnnotatorState(potentialNextState, annotationCharacter, tokenizationIndex, false, as); //System.out.println("tokened " + chemicalWordLowerCase.substring(posInName, tokenizationIndex)); asStack.add(newAs); } } } - List possibleAutomata = resourceManager.symbolRegexAutomataDict[i]; + RunAutomaton possibleAutomata = symbolRegexAutomataDict[i]; if (possibleAutomata != null) {//next could be an automaton - for (RunAutomaton automaton : possibleAutomata) { - int matchLength = automaton.run(chemicalWord, posInName); - if (matchLength != -1){//matchLength = -1 means it did not match - AnnotatorState newAs = new AnnotatorState(); - newAs.posInName = posInName + matchLength; - newAs.tokens = new ArrayList(as.tokens); - newAs.tokens.add(chemicalWord.substring(posInName, posInName + matchLength)); - newAs.annot = new ArrayList(as.annot); - newAs.annot.add(annotationCharacter); - newAs.state = potentialNextState; - //System.out.println("neword automata " + chemicalWord.substring(posInName, posInName + matchLength)); - asStack.add(newAs); - } + int matchLength = possibleAutomata.run(chemicalWord, posInName); + if (matchLength != -1){//matchLength = -1 means it did not match + int tokenizationIndex = posInName + matchLength; + AnnotatorState newAs = new AnnotatorState(potentialNextState, annotationCharacter, tokenizationIndex, true, as); + //System.out.println("neword automata " + chemicalWord.substring(posInName, tokenizationIndex)); + asStack.add(newAs); } } - List possibleRegexes = resourceManager.symbolRegexesDict[i]; - if (possibleRegexes != null) {//next could be a regex - for (Pattern pattern : possibleRegexes) { - Matcher mat = pattern.matcher(chemicalWord).region(posInName, chemicalWord.length()); - if (mat.lookingAt()) {//match at start - AnnotatorState newAs = new AnnotatorState(); - String matchedString = mat.group(0); - newAs.posInName = posInName + matchedString.length(); - newAs.tokens = new ArrayList(as.tokens); - newAs.tokens.add(matchedString); - newAs.annot = new ArrayList(as.annot); - newAs.annot.add(annotationCharacter); - newAs.state = potentialNextState; - //System.out.println("neword regex " + matchedString); - asStack.add(newAs); - } + Pattern possibleRegex = symbolRegexesDict[i]; + if (possibleRegex != null) {//next could be a regex + Matcher mat = possibleRegex.matcher(chemicalWord).region(posInName, chemicalWord.length()); + mat.useTransparentBounds(true); + if (mat.lookingAt()) {//match at start + int tokenizationIndex = posInName + mat.group(0).length(); + AnnotatorState newAs = new AnnotatorState(potentialNextState, annotationCharacter, tokenizationIndex, true, as); + //System.out.println("neword regex " + mat.group(0)); + asStack.add(newAs); } } } @@ -163,16 +131,34 @@ } List outputList = new ArrayList(); String uninterpretableName = chemicalWord; - String unparseableName = chemicalWord.substring(longestAnnotation.posInName); + String unparseableName = chemicalWord.substring(longestAnnotation.getPosInName()); if (successfulAnnotations.size() > 0){//at least some of the name could be interpreted into a substituent/full/functionalTerm int bestAcceptPosInName = -1; for(AnnotatorState as : successfulAnnotations) { - ParseTokens pt = new ParseTokens(as.tokens, as.annot); - outputList.add(pt); - bestAcceptPosInName = as.posInName;//all acceptable annotator states found should have the same posInName + outputList.add(convertAnnotationStateToParseTokens(as, chemicalWord, chemicalWordLowerCase)); + bestAcceptPosInName = as.getPosInName();//all acceptable annotator states found should have the same posInName } uninterpretableName = chemicalWord.substring(bestAcceptPosInName); } return new ParseRulesResults(outputList, uninterpretableName, unparseableName); } + + private ParseTokens convertAnnotationStateToParseTokens(AnnotatorState as, String chemicalWord, String chemicalWordLowerCase) { + List tokens = new ArrayList(); + List annotations = new ArrayList(); + AnnotatorState previousAs; + while ((previousAs = as.getPreviousAs()) != null) { + if (as.isCaseSensitive()) { + tokens.add(chemicalWord.substring(previousAs.getPosInName(), as.getPosInName())); + } + else{ + tokens.add(chemicalWordLowerCase.substring(previousAs.getPosInName(), as.getPosInName())); + } + annotations.add(as.getAnnot()); + as = previousAs; + } + Collections.reverse(tokens); + Collections.reverse(annotations); + return new ParseTokens(tokens, annotations); + } } diff -Nru opsin-1.5.0/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/ParseRulesResults.java opsin-2.3.1/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/ParseRulesResults.java --- opsin-1.5.0/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/ParseRulesResults.java 2013-07-21 14:02:29.000000000 +0000 +++ opsin-2.3.1/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/ParseRulesResults.java 2017-07-23 20:55:18.000000000 +0000 @@ -22,7 +22,7 @@ /** * One ParseTokens object is returned for each possible interpretation of a chemical name * If none of the name can be interpreted this list will be empty - * @return + * @return List of possible tokenisations/annotation of tokens */ public List getParseTokensList() { return parseTokensList; @@ -31,7 +31,7 @@ /** * The substring of the name that could not be classified into a substituent/full/functionalTerm * e.g. in ethyl-2H-fooarene "2H-fooarene" will be returned - * @return + * @return String of uninterpetable chemical name */ public String getUninterpretableName() { return uninterpretableName; @@ -41,7 +41,7 @@ * The substring of the name that could not be tokenised at all. * This will always be the same or shorter than the uninterpetable substring of name * e.g. in ethyl-2H-fooarene "fooarene" will be returned - * @return + * @return String of unparseable chemical name */ public String getUnparseableName() { return unparseableName; diff -Nru opsin-1.5.0/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/PreProcessingException.java opsin-2.3.1/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/PreProcessingException.java --- opsin-1.5.0/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/PreProcessingException.java 2013-07-21 14:02:29.000000000 +0000 +++ opsin-2.3.1/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/PreProcessingException.java 2017-07-23 20:55:18.000000000 +0000 @@ -11,22 +11,18 @@ PreProcessingException() { super(); - // TODO Auto-generated constructor stub } PreProcessingException(String message) { super(message); - // TODO Auto-generated constructor stub } PreProcessingException(String message, Throwable cause) { super(message, cause); - // TODO Auto-generated constructor stub } PreProcessingException(Throwable cause) { super(cause); - // TODO Auto-generated constructor stub } } diff -Nru opsin-1.5.0/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/PreProcessor.java opsin-2.3.1/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/PreProcessor.java --- opsin-1.5.0/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/PreProcessor.java 2013-07-21 14:02:29.000000000 +0000 +++ opsin-2.3.1/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/PreProcessor.java 2017-07-23 20:55:18.000000000 +0000 @@ -1,44 +1,50 @@ package uk.ac.cam.ch.wwmm.opsin; import java.util.HashMap; -import java.util.regex.Matcher; -import java.util.regex.Pattern; +import java.util.Locale; +import java.util.Map; /** * Takes a name: * strips leading/trailing white space - * rejects a few special cases * Normalises representation of greeks and some other characters * @author dl387 * */ class PreProcessor { - private static final Pattern MATCH_DOLLAR = Pattern.compile("\\$"); - private static final Pattern MATCH_SULPH = Pattern.compile("sulph", Pattern.CASE_INSENSITIVE); - private static final Pattern MATCH_DOT_GREEK_DOT = Pattern.compile("\\.(alpha|beta|gamma|delta|epsilon|zeta|eta|lambda|omega)\\.", Pattern.CASE_INSENSITIVE); - private static final Pattern MATCH_HTML_ENTITY_GREEK = Pattern.compile("&(alpha|beta|gamma|delta|epsilon|zeta|eta|lambda|omega);", Pattern.CASE_INSENSITIVE); - private static final HashMap GREEK_MAP = new HashMap(); + private static final Map DOTENCLOSED_TO_DESIRED = new HashMap(); + private static final Map XMLENTITY_TO_DESIRED = new HashMap(); static { - GREEK_MAP.put("a", "alpha"); - GREEK_MAP.put("b", "beta"); - GREEK_MAP.put("g", "gamma"); - GREEK_MAP.put("d", "delta"); - GREEK_MAP.put("e", "epsilon"); -// letterGreekMap.put("z", "zeta"); -// letterGreekMap.put("i", "iota"); -// letterGreekMap.put("k", "kappa"); - GREEK_MAP.put("l", "lambda"); -// letterGreekMap.put("m", "mu"); -// letterGreekMap.put("n", "nu"); -// letterGreekMap.put("x", "xi"); -// letterGreekMap.put("p", "pi"); -// letterGreekMap.put("r", "rho"); -// letterGreekMap.put("s", "sigma"); -// letterGreekMap.put("t", "tau"); -// letterGreekMap.put("u", "upsilon"); -// letterGreekMap.put("f", "phi"); -// letterGreekMap.put("o", "omega"); + DOTENCLOSED_TO_DESIRED.put("a", "alpha"); + DOTENCLOSED_TO_DESIRED.put("b", "beta"); + DOTENCLOSED_TO_DESIRED.put("g", "gamma"); + DOTENCLOSED_TO_DESIRED.put("d", "delta"); + DOTENCLOSED_TO_DESIRED.put("e", "epsilon"); + DOTENCLOSED_TO_DESIRED.put("l", "lambda"); + DOTENCLOSED_TO_DESIRED.put("x", "xi"); + DOTENCLOSED_TO_DESIRED.put("alpha", "alpha"); + DOTENCLOSED_TO_DESIRED.put("beta", "beta"); + DOTENCLOSED_TO_DESIRED.put("gamma", "gamma"); + DOTENCLOSED_TO_DESIRED.put("delta", "delta"); + DOTENCLOSED_TO_DESIRED.put("epsilon", "epsilon"); + DOTENCLOSED_TO_DESIRED.put("zeta", "zeta"); + DOTENCLOSED_TO_DESIRED.put("eta", "eta"); + DOTENCLOSED_TO_DESIRED.put("lambda", "lambda"); + DOTENCLOSED_TO_DESIRED.put("xi", "xi"); + DOTENCLOSED_TO_DESIRED.put("omega", "omega"); + DOTENCLOSED_TO_DESIRED.put("fwdarw", "->"); + + XMLENTITY_TO_DESIRED.put("alpha", "alpha"); + XMLENTITY_TO_DESIRED.put("beta", "beta"); + XMLENTITY_TO_DESIRED.put("gamma", "gamma"); + XMLENTITY_TO_DESIRED.put("delta", "delta"); + XMLENTITY_TO_DESIRED.put("epsilon", "epsilon"); + XMLENTITY_TO_DESIRED.put("zeta", "zeta"); + XMLENTITY_TO_DESIRED.put("eta", "eta"); + XMLENTITY_TO_DESIRED.put("lambda", "lambda"); + XMLENTITY_TO_DESIRED.put("xi", "xi"); + XMLENTITY_TO_DESIRED.put("omega", "omega"); } /** @@ -48,57 +54,120 @@ * @throws PreProcessingException */ static String preProcess(String chemicalName) throws PreProcessingException { - chemicalName=chemicalName.trim();//remove leading and trailing whitespace - if ("".equals(chemicalName)){ + chemicalName = chemicalName.trim();//remove leading and trailing whitespace + if (chemicalName.length() == 0){ throw new PreProcessingException("Input chemical name was blank!"); } - chemicalName = processDollarPrefixedGreeks(chemicalName); - chemicalName = processDotSurroundedGreeks(chemicalName); - chemicalName = processHtmlEntityGreeks(chemicalName); + + chemicalName = performMultiCharacterReplacements(chemicalName); chemicalName = StringTools.convertNonAsciiAndNormaliseRepresentation(chemicalName); - chemicalName = MATCH_SULPH.matcher(chemicalName).replaceAll("sulf");//correct British spelling to the IUPAC spelling return chemicalName; } - private static String processDollarPrefixedGreeks(String chemicalName) { - Matcher m = MATCH_DOLLAR.matcher(chemicalName); - while (m.find()){ - if (chemicalName.length()>m.end()){ - String letter = chemicalName.substring(m.end(), m.end()+1).toLowerCase(); - if (GREEK_MAP.containsKey(letter)){ - chemicalName = chemicalName.substring(0, m.end()-1) +GREEK_MAP.get(letter) + chemicalName.substring(m.end()+1); - m = MATCH_DOLLAR.matcher(chemicalName); + private static String performMultiCharacterReplacements(String chemicalName) { + StringBuilder sb = new StringBuilder(chemicalName.length()); + for (int i = 0, nameLength = chemicalName.length(); i < nameLength; i++) { + char ch = chemicalName.charAt(i); + switch (ch) { + case '$': + if (i + 1 < nameLength){ + char letter = chemicalName.charAt(i + 1); + String replacement = getReplacementForDollarGreek(letter); + if (replacement != null){ + sb.append(replacement); + i++; + break; + } + } + sb.append(ch); + break; + case '.': + //e.g. .alpha. + String dotEnclosedString = getLowerCasedDotEnclosedString(chemicalName, i); + String dotEnclosedReplacement = DOTENCLOSED_TO_DESIRED.get(dotEnclosedString); + if (dotEnclosedReplacement != null){ + sb.append(dotEnclosedReplacement); + i = i + dotEnclosedString.length() + 1; + break; + } + sb.append(ch); + break; + case '&': + { + //e.g. α + String xmlEntityString = getLowerCasedXmlEntityString(chemicalName, i); + String xmlEntityReplacement = XMLENTITY_TO_DESIRED.get(xmlEntityString); + if (xmlEntityReplacement != null){ + sb.append(xmlEntityReplacement); + i = i + xmlEntityReplacement.length() + 1; + break; } + sb.append(ch); + break; + } + case 's': + case 'S'://correct British spelling to the IUPAC spelling + if (chemicalName.regionMatches(true, i + 1, "ulph", 0, 4)){ + sb.append("sulf"); + i = i + 4; + break; + } + sb.append(ch); + break; + default: + sb.append(ch); } } - return chemicalName; + return sb.toString(); + } + + private static String getLowerCasedDotEnclosedString(String chemicalName, int indexOfFirstDot) { + int end = -1; + int limit = Math.min(indexOfFirstDot + 9, chemicalName.length()); + for (int j = indexOfFirstDot + 1; j < limit; j++) { + if (chemicalName.charAt(j) == '.'){ + end = j; + break; + } + } + if (end > 0){ + return chemicalName.substring(indexOfFirstDot + 1, end).toLowerCase(Locale.ROOT); + } + return null; } - /** - * Removes dots around greek letters e.g. .alpha. -->alpha - * @param chemicalName - * @return - */ - private static String processDotSurroundedGreeks(String chemicalName) { - Matcher m = MATCH_DOT_GREEK_DOT.matcher(chemicalName); - while (m.find()){ - chemicalName = chemicalName.substring(0, m.start()) + m.group(1) + chemicalName.substring(m.end()); - m = MATCH_DOT_GREEK_DOT.matcher(chemicalName); + private static String getLowerCasedXmlEntityString(String chemicalName, int indexOfAmpersand) { + int end = -1; + int limit = Math.min(indexOfAmpersand + 9, chemicalName.length()); + for (int j = indexOfAmpersand + 1; j < limit; j++) { + if (chemicalName.charAt(j) == ';'){ + end = j; + break; + } } - return chemicalName; + if (end > 0){ + return chemicalName.substring(indexOfAmpersand + 1, end).toLowerCase(Locale.ROOT); + } + return null; } - /** - * Removes HTML entity escaping e.g. α -->alpha - * @param chemicalName - * @return - */ - private static String processHtmlEntityGreeks(String chemicalName) { - Matcher m = MATCH_HTML_ENTITY_GREEK.matcher(chemicalName); - while (m.find()){ - chemicalName = chemicalName.substring(0, m.start()) + m.group(1) + chemicalName.substring(m.end()); - m = MATCH_HTML_ENTITY_GREEK.matcher(chemicalName); + private static String getReplacementForDollarGreek(char ch) { + switch (ch) { + case 'a' : + return "alpha"; + case 'b' : + return "beta"; + case 'g' : + return "gamma"; + case 'd' : + return "delta"; + case 'e' : + return "epsilon"; + case 'l' : + return "lambda"; + default: + return null; } - return chemicalName; } + } diff -Nru opsin-1.5.0/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/ResourceGetter.java opsin-2.3.1/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/ResourceGetter.java --- opsin-1.5.0/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/ResourceGetter.java 2013-07-21 14:02:29.000000000 +0000 +++ opsin-2.3.1/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/ResourceGetter.java 2017-07-23 20:55:18.000000000 +0000 @@ -10,33 +10,36 @@ import java.io.UnsupportedEncodingException; import java.net.URL; +import javax.xml.stream.XMLInputFactory; +import javax.xml.stream.XMLStreamException; +import javax.xml.stream.XMLStreamReader; + import org.apache.commons.io.IOUtils; -import org.xml.sax.SAXException; -import org.xml.sax.SAXNotRecognizedException; -import org.xml.sax.SAXNotSupportedException; -import org.xml.sax.XMLReader; -import org.xml.sax.helpers.XMLReaderFactory; - -import nu.xom.Builder; -import nu.xom.Document; -import nu.xom.ParsingException; -import nu.xom.ValidityException; +import org.codehaus.stax2.XMLInputFactory2; + +import com.ctc.wstx.stax.WstxInputFactory; /** * Handles I/O: * Gets resource files from packages which is useful for including data from the JAR file. * Provides OutputStreams for the serialisation of automata. - * This class has its roots in the resourceGetter in OSCAR. * * @author ptc24 * @author dl387 * */ class ResourceGetter { + + private static final XMLInputFactory xmlInputFactory; private final String resourcePath; - private String workingDirectory; - private final Builder xomBuilder; + private final String workingDirectory; + + static { + xmlInputFactory = new WstxInputFactory(); + xmlInputFactory.setProperty(XMLInputFactory.SUPPORT_DTD, false); + xmlInputFactory.setProperty(XMLInputFactory2.P_AUTO_CLOSE_INPUT, true); + } /** * Sets up a resourceGetter to get resources from a particular path. @@ -50,39 +53,32 @@ resourcePath = resourcePath.substring(1); } this.resourcePath = resourcePath; + String workingDirectory; try { - workingDirectory =new File(".").getCanonicalPath();//works on linux unlike using the system property + workingDirectory = new File(".").getCanonicalPath();//works on linux unlike using the system property } catch (IOException e) { //Automata will not be serialisable workingDirectory = null; } - - XMLReader xmlReader; - try{ - xmlReader = XMLReaderFactory.createXMLReader(); - } - catch (SAXException e) { - throw new RuntimeException("No XML Reader could be initialised!", e); - } - try{ - xmlReader.setFeature("http://apache.org/xml/features/nonvalidating/load-external-dtd", false); - } - catch (SAXNotSupportedException e) { - throw new RuntimeException("Your system's default XML Reader does not support disabling DTD loading! Maybe try updating your version of java?", e); - } catch (SAXNotRecognizedException e) { - throw new RuntimeException("Your system's default XML Reader has not recognised the DTD loading feature! Maybe try updating your version of java?", e); - } - xomBuilder = new Builder(xmlReader); + this.workingDirectory = workingDirectory; } - + + /** + * Gets the resourcePath used to initialise this ResourceGetter + * @return + */ + String getResourcePath() { + return resourcePath; + } + /**Fetches a data file from resourcePath, - * and parses it to an XML Document. + * and returns an XML stream reader for it * * @param name The name of the file to parse. - * @return The parsed document. + * @return An XMLStreamReader * @throws IOException */ - Document getXMLDocument(String name) throws IOException { + XMLStreamReader getXMLStreamReader(String name) throws IOException { if(name == null){ throw new IllegalArgumentException("Input to function was null"); } @@ -90,7 +86,7 @@ if (workingDirectory != null){ File f = getFile(name); if(f != null) { - return xomBuilder.build(f); + return xmlInputFactory.createXMLStreamReader(new FileInputStream(f)); } } ClassLoader l = getClass().getClassLoader(); @@ -98,24 +94,15 @@ if (url == null){ throw new IOException("URL for resource: " + resourcePath + name + " is invalid"); } - return xomBuilder.build(url.openStream()); - } catch (ValidityException e) { - IOException ioe = new IOException("Validity exception occurred while reading the XML file with name:" +name); - ioe.initCause(e); - throw ioe; - } catch (ParsingException e) { - IOException ioe = new IOException("Parsing exception occurred while reading the XML file with name:" +name); - ioe.initCause(e); - throw ioe; + return xmlInputFactory.createXMLStreamReader(url.openStream()); + } catch (XMLStreamException e) { + throw new IOException("Validity exception occurred while reading the XML file with name:" +name, e); } } private File getFile(String name) { File f = new File(getResDir(), name); - if(f.isDirectory()){ - return null; - } - if(f.exists()){ + if(f.isFile()){ return f; } return null; diff -Nru opsin-1.5.0/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/ResourceManager.java opsin-2.3.1/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/ResourceManager.java --- opsin-1.5.0/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/ResourceManager.java 2013-07-21 14:02:29.000000000 +0000 +++ opsin-2.3.1/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/ResourceManager.java 2017-07-23 20:55:18.000000000 +0000 @@ -1,19 +1,18 @@ package uk.ac.cam.ch.wwmm.opsin; import java.io.IOException; -import java.util.ArrayList; import java.util.Arrays; import java.util.HashMap; -import java.util.List; import java.util.Map; import java.util.regex.Matcher; import java.util.regex.Pattern; -import dk.brics.automaton.RunAutomaton; +import javax.xml.stream.XMLStreamConstants; +import javax.xml.stream.XMLStreamException; +import javax.xml.stream.XMLStreamReader; -import nu.xom.Document; -import nu.xom.Element; -import nu.xom.Elements; +import dk.brics.automaton.RunAutomaton; +import static uk.ac.cam.ch.wwmm.opsin.XmlDeclarations.*; /**Holds all of the tokens used in parsing of chemical names. * Holds all automata @@ -24,36 +23,40 @@ * */ class ResourceManager { + private static final TokenEl IGNORE_WHEN_WRITING_PARSE_TREE = new TokenEl(""); /**Used to load XML files.*/ private final ResourceGetter resourceGetter; + /**Used to serialise and deserialise automata.*/ + private final AutomatonInitialiser automatonInitialiser; + /**A mapping between primitive tokens, and annotation->Token object mappings.*/ - final HashMap> tokenDict = new HashMap>(); + private final HashMap> tokenDict = new HashMap>(); /**A mapping between regex tokens, and annotation->Token object mappings.*/ - final HashMap reSymbolTokenDict = new HashMap(); + private final HashMap reSymbolTokenDict = new HashMap(); /**A mapping between annotation symbols and a trie of tokens.*/ - final OpsinRadixTrie[] symbolTokenNamesDict; + private final OpsinRadixTrie[] symbolTokenNamesDict; /**A mapping between annotation symbols and DFAs (annotation->automata mapping).*/ - final List[] symbolRegexAutomataDict; + private final RunAutomaton[] symbolRegexAutomataDict; /**A mapping between annotation symbols and regex patterns (annotation->regex pattern mapping).*/ - final List[] symbolRegexesDict; + private final Pattern[] symbolRegexesDict; /**The automaton which describes the grammar of a chemical name from left to right*/ - final RunAutomaton chemicalAutomaton; + private final RunAutomaton chemicalAutomaton; /**As symbolTokenNamesDict but the tokens are reversed*/ - OpsinRadixTrie[] symbolTokenNamesDictReversed; + private OpsinRadixTrie[] symbolTokenNamesDictReversed; /**As symbolRegexAutomataDict but automata are reversed */ - List[] symbolRegexAutomataDictReversed; + private RunAutomaton[] symbolRegexAutomataDictReversed; /**As symbolRegexesDict but regexes match the end of string */ - List[] symbolRegexesDictReversed; + private Pattern[] symbolRegexesDictReversed; /**The automaton which describes the grammar of a chemical name from right to left*/ - RunAutomaton reverseChemicalAutomaton; + private RunAutomaton reverseChemicalAutomaton; /**Generates the ResourceManager. * This involves reading in the token files, the regexToken file (regexTokens.xml) and the grammar file (regexes.xml). @@ -63,200 +66,456 @@ * @param resourceGetter * @throws IOException */ - @SuppressWarnings("unchecked") - ResourceManager(ResourceGetter resourceGetter) throws IOException{ + ResourceManager(ResourceGetter resourceGetter) throws IOException { this.resourceGetter = resourceGetter; + this.automatonInitialiser = new AutomatonInitialiser(resourceGetter.getResourcePath() + "serialisedAutomata/"); chemicalAutomaton = processChemicalGrammar(false); int grammarSymbolsSize = chemicalAutomaton.getCharIntervals().length; symbolTokenNamesDict = new OpsinRadixTrie[grammarSymbolsSize]; - symbolRegexAutomataDict = new List[grammarSymbolsSize]; - symbolRegexesDict = new List[grammarSymbolsSize]; + symbolRegexAutomataDict = new RunAutomaton[grammarSymbolsSize]; + symbolRegexesDict = new Pattern[grammarSymbolsSize]; processTokenFiles(false); processRegexTokenFiles(false); } /** * Processes tokenFiles - * @param reversed Should the hashing of + * @param reversed Should the tokens be reversed * @throws IOException */ - private void processTokenFiles(boolean reversed) throws IOException{ - Document tokenFiles = resourceGetter.getXMLDocument("index.xml"); - Elements files = tokenFiles.getRootElement().getChildElements("tokenFile"); - for(int i=0;i tokenLists; - if (rootElement.getLocalName().equals("tokenLists")){//support for xml files with one "tokenList" or multiple "tokenList" under a "tokenLists" element - tokenLists = XOMTools.getChildElementsWithTagName(rootElement, "tokenList"); + private void processTokenFiles(boolean reversed) throws IOException { + XMLStreamReader filesToProcessReader = resourceGetter.getXMLStreamReader("index.xml"); + try { + while (filesToProcessReader.hasNext()) { + int event = filesToProcessReader.next(); + if (event == XMLStreamConstants.START_ELEMENT && + filesToProcessReader.getLocalName().equals("tokenFile")) { + String fileName = filesToProcessReader.getElementText(); + processTokenFile(fileName, reversed); + } } - else{ - tokenLists =new ArrayList(); - tokenLists.add(rootElement); + } + catch (XMLStreamException e) { + throw new IOException("Parsing exception occurred while reading index.xml", e); + } + finally { + try { + filesToProcessReader.close(); + } catch (XMLStreamException e) { + throw new IOException("Parsing exception occurred while reading index.xml", e); } - for (Element tokenList : tokenLists) { - char symbol = tokenList.getAttributeValue("symbol").charAt(0); - List tokenElements = XOMTools.getChildElementsWithTagName(tokenList, "token"); - int index = Arrays.binarySearch(chemicalAutomaton.getCharIntervals(), symbol); - if (index < 0){ - throw new RuntimeException(symbol +" is associated with a tokenList of tagname " + tokenList.getAttributeValue("tagname") +" however it is not actually used in OPSIN's grammar!!!"); + } + } + + private void processTokenFile(String fileName, boolean reversed) throws IOException { + XMLStreamReader reader = resourceGetter.getXMLStreamReader(fileName); + try { + while (reader.hasNext()) { + if (reader.next() == XMLStreamConstants.START_ELEMENT) { + String tagName = reader.getLocalName(); + if (tagName.equals("tokenLists")) { + while (reader.hasNext()) { + switch (reader.next()) { + case XMLStreamConstants.START_ELEMENT: + if (reader.getLocalName().equals("tokenList")) { + processTokenList(reader, reversed); + } + break; + } + } + } + else if (tagName.equals("tokenList")) { + processTokenList(reader, reversed); + } } - for (Element tokenElement : tokenElements) { - String t = tokenElement.getValue(); + } + } + catch (XMLStreamException e) { + throw new IOException("Parsing exception occurred while reading " + fileName, e); + } + finally { + try { + reader.close(); + } catch (XMLStreamException e) { + throw new IOException("Parsing exception occurred while reading " + fileName, e); + } + } + } - if(!tokenDict.containsKey(t)) { - tokenDict.put(t, new HashMap()); + private void processTokenList(XMLStreamReader reader, boolean reversed) throws XMLStreamException { + String tokenTagName = null; + Character symbol = null; + String type = null; + String subType = null; + boolean ignoreWhenWritingXML = false; + + for (int i = 0, l = reader.getAttributeCount(); i < l; i++) { + String atrName = reader.getAttributeLocalName(i); + String atrValue = reader.getAttributeValue(i); + if (atrName.equals("tagname")){ + tokenTagName = atrValue; + } + else if (atrName.equals("symbol")){ + symbol = atrValue.charAt(0); + } + else if (atrName.equals(TYPE_ATR)){ + type = atrValue; + } + else if (atrName.equals(SUBTYPE_ATR)){ + subType = atrValue; + } + else if (atrName.equals("ignoreWhenWritingXML")){ + ignoreWhenWritingXML = atrValue.equals("yes"); + } + else{ + throw new RuntimeException("Malformed tokenlist"); + } + } + if (tokenTagName == null || symbol == null) { + throw new RuntimeException("Malformed tokenlist"); + } + + int index = Arrays.binarySearch(chemicalAutomaton.getCharIntervals(), symbol); + if (index < 0) { + throw new RuntimeException(symbol +" is associated with a tokenList of tagname " + tokenTagName +" however it is not actually used in OPSIN's grammar!!!"); + } + + while (reader.hasNext()) { + switch (reader.next()) { + case XMLStreamConstants.START_ELEMENT: + if (reader.getLocalName().equals("token")) { + TokenEl el; + if (ignoreWhenWritingXML) { + el = IGNORE_WHEN_WRITING_PARSE_TREE; } - tokenDict.get(t).put(symbol, new Token(tokenElement, tokenList)); - if (!reversed){ - if(symbolTokenNamesDict[index]==null) { - symbolTokenNamesDict[index] = new OpsinRadixTrie(); + else{ + el = new TokenEl(tokenTagName); + if (type != null) { + el.addAttribute(TYPE_ATR, type); + } + if (subType != null) { + el.addAttribute(SUBTYPE_ATR, subType); + } + for (int i = 0, l = reader.getAttributeCount(); i < l; i++) { + el.addAttribute(reader.getAttributeLocalName(i), reader.getAttributeValue(i)); } - symbolTokenNamesDict[index].addToken(t); } - else{ - if(symbolTokenNamesDictReversed[index]==null) { - symbolTokenNamesDictReversed[index] = new OpsinRadixTrie(); + String text = reader.getElementText(); + StringBuilder sb = new StringBuilder(text.length()); + for (int i = 0, len = text.length(); i < len; i++) { + char ch = text.charAt(i); + if (ch == '\\') { + if (i + 1 >= len) { + throw new RuntimeException("Malformed token text: " + text); + } + ch = text.charAt(++i); } - symbolTokenNamesDictReversed[index].addToken(new StringBuffer(t).reverse().toString()); + else if (ch == '|') { + addToken(sb.toString(), el, symbol, index, reversed); + sb.setLength(0); + continue; + } + sb.append(ch); } + addToken(sb.toString(), el, symbol, index, reversed); + } + break; + case XMLStreamConstants.END_ELEMENT: + if (reader.getLocalName().equals("tokenList")) { + return; } + break; } } } + private void addToken(String text, TokenEl el, Character symbol, int index, boolean reversed) { + Map symbolToToken = tokenDict.get(text); + if(symbolToToken == null) { + symbolToToken = new HashMap(); + tokenDict.put(text, symbolToToken); + } + symbolToToken.put(symbol, el); + + if (!reversed){ + OpsinRadixTrie trie = symbolTokenNamesDict[index]; + if(trie == null) { + trie = new OpsinRadixTrie(); + symbolTokenNamesDict[index] = trie; + } + trie.addToken(text); + } + else{ + OpsinRadixTrie trie = symbolTokenNamesDictReversed[index]; + if(trie == null) { + trie = new OpsinRadixTrie(); + symbolTokenNamesDictReversed[index] = trie; + } + trie.addToken(new StringBuilder(text).reverse().toString()); + } + } + private void processRegexTokenFiles(boolean reversed) throws IOException{ - Element reTokenList = resourceGetter.getXMLDocument("regexTokens.xml").getRootElement(); - Elements regexEls = reTokenList.getChildElements(); - - HashMap tempRegexes = new HashMap(); + XMLStreamReader reader = resourceGetter.getXMLStreamReader("regexTokens.xml"); + Map tempRegexes = new HashMap(); Pattern matchRegexReplacement = Pattern.compile("%.*?%"); - for(int i=0;i(); - } - symbolRegexAutomataDict[index].add(AutomatonInitialiser.loadAutomaton(regexEl.getAttributeValue("tagname")+"_"+(int)symbol, newValueSB.toString(), false, false)); - } - else{ - if(symbolRegexesDict[index]==null) { - symbolRegexesDict[index] = new ArrayList(); - } - symbolRegexesDict[index].add(Pattern.compile(newValueSB.toString())); - } + private void addRegexToken(XMLStreamReader reader, String regex, boolean reversed) { + String tokenTagName = null; + Character symbol = null; + String type = null; + String subType = null; + String value = null; + boolean determinise = false; + boolean ignoreWhenWritingXML = false; + + for (int i = 0, l = reader.getAttributeCount(); i < l; i++) { + String atrName = reader.getAttributeLocalName(i); + String atrValue = reader.getAttributeValue(i); + if (atrName.equals("tagname")){ + tokenTagName = atrValue; + } + else if (atrName.equals("symbol")){ + symbol = atrValue.charAt(0); + } + else if (atrName.equals(TYPE_ATR)){ + type = atrValue; + } + else if (atrName.equals(SUBTYPE_ATR)){ + subType = atrValue; + } + else if (atrName.equals("value")){ + value = atrValue; + } + else if (atrName.equals("determinise")){ + determinise = atrValue.equals("yes"); + } + else if (atrName.equals("ignoreWhenWritingXML")){ + ignoreWhenWritingXML = atrValue.equals("yes"); + } + else if (!atrName.equals("regex")){ + throw new RuntimeException("Malformed regexToken"); + } + } + if (tokenTagName == null || symbol == null) { + throw new RuntimeException("Malformed regexToken"); + } + + if (!reversed) { + //reSymbolTokenDict will be populated when the constructor is called for left-right parsing, hence skip for right-left + if (reSymbolTokenDict.get(symbol) != null) { + throw new RuntimeException(symbol +" is associated with multiple regular expressions. The following expression clashes: " + regex +" This should be resolved by combining regular expressions that map the same symbol" ); + } + + if (ignoreWhenWritingXML) { + reSymbolTokenDict.put(symbol, IGNORE_WHEN_WRITING_PARSE_TREE); } else{ - if (regexEl.getAttribute("determinise")!=null){//should the regex be compiled into a DFA for faster execution? - if(symbolRegexAutomataDictReversed[index]==null) { - symbolRegexAutomataDictReversed[index] = new ArrayList(); - } - symbolRegexAutomataDictReversed[index].add(AutomatonInitialiser.loadAutomaton(regexEl.getAttributeValue("tagname")+"_"+(int)symbol, newValueSB.toString(), false, true)); + TokenEl el = new TokenEl(tokenTagName); + if (type != null){ + el.addAttribute(TYPE_ATR, type); } - else{ - if(symbolRegexesDictReversed[index]==null) { - symbolRegexesDictReversed[index] = new ArrayList(); - } - symbolRegexesDictReversed[index].add(Pattern.compile(newValueSB.toString() +"$")); + if (subType != null){ + el.addAttribute(SUBTYPE_ATR, subType); } + if (value != null){ + el.addAttribute(VALUE_ATR, value); + } + reSymbolTokenDict.put(symbol, el); + } + } + + int index = Arrays.binarySearch(chemicalAutomaton.getCharIntervals(), symbol); + if (index < 0){ + throw new RuntimeException(symbol +" is associated with the regex " + regex +" however it is not actually used in OPSIN's grammar!!!"); + } + if (!reversed){ + if (determinise){//should the regex be compiled into a DFA for faster execution? + symbolRegexAutomataDict[index] = automatonInitialiser.loadAutomaton(tokenTagName + "_" + (int)symbol, regex, false, false); + } + else{ + symbolRegexesDict[index] = Pattern.compile(regex); + } + } + else{ + if (determinise){//should the regex be compiled into a DFA for faster execution? + symbolRegexAutomataDictReversed[index] = automatonInitialiser.loadAutomaton(tokenTagName + "_" + (int)symbol, regex, false, true); + } + else{ + symbolRegexesDictReversed[index] = Pattern.compile(regex +"$"); } } } - - private RunAutomaton processChemicalGrammar(boolean reversed) throws IOException{ - Map regexDict = new HashMap(); - Elements regexes = resourceGetter.getXMLDocument("regexes.xml").getRootElement().getChildElements("regex"); + + private RunAutomaton processChemicalGrammar(boolean reversed) throws IOException { + XMLStreamReader reader = resourceGetter.getXMLStreamReader("regexes.xml"); + Map regexDict = new HashMap(); Pattern matchRegexReplacement = Pattern.compile("%.*?%"); - for(int i=0;i annotationToToken = tokenDict.get(tokenString); + if(annotationToToken != null){ + TokenEl token = annotationToToken.get(symbol); + if (token != null) { + if (token == IGNORE_WHEN_WRITING_PARSE_TREE){ + return null; + } + return token.copy(tokenString); + } } - if (reSymbolTokenDict.get(symbol)!=null){ - return reSymbolTokenDict.get(symbol).makeElement(token); + TokenEl regexToken = reSymbolTokenDict.get(symbol); + if (regexToken != null){ + if (regexToken == IGNORE_WHEN_WRITING_PARSE_TREE){ + return null; + } + return regexToken.copy(tokenString); } - throw new ParsingException("Parsing Error: This is a bug in the program. A token element could not be found for token: " + token +" using annotation symbol: " +symbol); + throw new ParsingException("Parsing Error: This is a bug in the program. A token element could not be found for token: " + tokenString +" using annotation symbol: " +symbol); + } + + RunAutomaton getChemicalAutomaton() { + return chemicalAutomaton; + } + + OpsinRadixTrie[] getSymbolTokenNamesDict() { + return symbolTokenNamesDict; + } + + RunAutomaton[] getSymbolRegexAutomataDict() { + return symbolRegexAutomataDict; + } + + Pattern[] getSymbolRegexesDict() { + return symbolRegexesDict; + } + + RunAutomaton getReverseChemicalAutomaton() { + return reverseChemicalAutomaton; } + + OpsinRadixTrie[] getSymbolTokenNamesDictReversed() { + return symbolTokenNamesDictReversed; + } + + RunAutomaton[] getSymbolRegexAutomataDictReversed() { + return symbolRegexAutomataDictReversed; + } + + Pattern[] getSymbolRegexesDictReversed() { + return symbolRegexesDictReversed; + } + } diff -Nru opsin-1.5.0/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/ReverseParseRules.java opsin-2.3.1/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/ReverseParseRules.java --- opsin-1.5.0/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/ReverseParseRules.java 2013-07-21 14:02:29.000000000 +0000 +++ opsin-2.3.1/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/ReverseParseRules.java 2017-07-23 20:55:18.000000000 +0000 @@ -1,9 +1,8 @@ package uk.ac.cam.ch.wwmm.opsin; import java.io.IOException; +import java.util.ArrayDeque; import java.util.ArrayList; -import java.util.Collections; -import java.util.LinkedList; import java.util.List; import java.util.regex.Matcher; import java.util.regex.Pattern; @@ -26,24 +25,14 @@ */ class ReverseParseRules { - /** A "struct" containing bits of state needed during finite-state parsing. */ - private static class AnnotatorState { - /** The current state of the DFA. */ - int state; - /** The annotation so far. */ - List annot; - /** The strings these annotations correspond to. */ - ArrayList tokens; - /** The index of the first char in the chemical name that has yet to be tokenised */ - int posInName; - } - /** A DFA encompassing the grammar of a chemical word. */ - private RunAutomaton chemAutomaton; + private final RunAutomaton chemAutomaton; /** The allowed symbols in chemAutomaton */ - private char[] stateSymbols; - - private final ResourceManager resourceManager; + private final char[] stateSymbols; + + private final OpsinRadixTrie[] symbolTokenNamesDictReversed; + private final RunAutomaton[] symbolRegexAutomataDictReversed; + private final Pattern[] symbolRegexesDictReversed; /** * Creates a right to left parser that can parse a substituent/full/functional word @@ -51,10 +40,12 @@ * @throws IOException */ ReverseParseRules(ResourceManager resourceManager) throws IOException{ - this.resourceManager = resourceManager; resourceManager.populatedReverseTokenMappings(); - chemAutomaton = resourceManager.reverseChemicalAutomaton; - stateSymbols = chemAutomaton.getCharIntervals(); + this.chemAutomaton = resourceManager.getReverseChemicalAutomaton(); + this.symbolTokenNamesDictReversed = resourceManager.getSymbolTokenNamesDictReversed(); + this.symbolRegexAutomataDictReversed = resourceManager.getSymbolRegexAutomataDictReversed(); + this.symbolRegexesDictReversed = resourceManager.getSymbolRegexesDictReversed(); + this.stateSymbols = chemAutomaton.getCharIntervals(); } /**Determines the possible annotations for a chemical word @@ -68,97 +59,70 @@ * @throws ParsingException */ public ParseRulesResults getParses(String chemicalWord) throws ParsingException { - String chemicalWordLowerCase = chemicalWord.toLowerCase(); - AnnotatorState startingAS = new AnnotatorState(); - startingAS.state = chemAutomaton.getInitialState(); - startingAS.annot = new ArrayList(); - startingAS.tokens = new ArrayList(); - startingAS.posInName = chemicalWord.length(); - LinkedList asStack = new LinkedList(); - asStack.add(startingAS); + AnnotatorState initialState = new AnnotatorState(chemAutomaton.getInitialState(), '\0', chemicalWord.length(), true, null); + String chemicalWordLowerCase = StringTools.lowerCaseAsciiString(chemicalWord); + ArrayDeque asStack = new ArrayDeque(); + asStack.add(initialState); int posInNameOfLastSuccessfulAnnotations = chemicalWord.length(); List successfulAnnotations = new ArrayList(); - AnnotatorState longestAnnotation = new AnnotatorState();//this is the longest annotation. It does not necessarily end in an accept state - longestAnnotation.state = chemAutomaton.getInitialState(); - longestAnnotation.annot = new ArrayList(); - longestAnnotation.tokens = new ArrayList(); - longestAnnotation.posInName = chemicalWord.length(); + AnnotatorState longestAnnotation = initialState;//this is the longest annotation. It does not necessarily end in an accept state int stateSymbolsSize = stateSymbols.length; while (!asStack.isEmpty()) { AnnotatorState as = asStack.removeFirst(); - int posInName = as.posInName; - if (chemAutomaton.isAccept(as.state)){ + int posInName = as.getPosInName(); + if (chemAutomaton.isAccept(as.getState())){ if (posInName <= posInNameOfLastSuccessfulAnnotations){//this annotation is worthy of consideration if (posInName < posInNameOfLastSuccessfulAnnotations){//this annotation is longer than any previously found annotation successfulAnnotations.clear(); posInNameOfLastSuccessfulAnnotations = posInName; } - else if (successfulAnnotations.size()>128){ + else if (successfulAnnotations.size() > 128){ throw new ParsingException("Ambiguity in OPSIN's chemical grammar has produced more than 128 annotations. Parsing has been aborted. Please report this as a bug"); } successfulAnnotations.add(as); } } //record the longest annotation found so it can be reported to the user for debugging - if (posInName < longestAnnotation.posInName){ + if (posInName < longestAnnotation.getPosInName()){ longestAnnotation = as; } for (int i = 0; i < stateSymbolsSize; i++) { char annotationCharacter = stateSymbols[i]; - int potentialNextState = chemAutomaton.step(as.state, annotationCharacter); + int potentialNextState = chemAutomaton.step(as.getState(), annotationCharacter); if (potentialNextState != -1) {//-1 means this state is not accessible from the previous state - OpsinRadixTrie possibleTokenisationsTrie = resourceManager.symbolTokenNamesDictReversed[i]; + OpsinRadixTrie possibleTokenisationsTrie = symbolTokenNamesDictReversed[i]; if (possibleTokenisationsTrie != null) { List possibleTokenisations = possibleTokenisationsTrie.findMatchesReadingStringRightToLeft(chemicalWordLowerCase, posInName); if (possibleTokenisations != null) {//next could be a token - for (int tokenizationIndex : possibleTokenisations) { - AnnotatorState newAs = new AnnotatorState(); - newAs.posInName = tokenizationIndex; - newAs.tokens = new ArrayList(as.tokens); - newAs.tokens.add(chemicalWordLowerCase.substring(tokenizationIndex, posInName)); - newAs.annot = new ArrayList(as.annot); - newAs.annot.add(annotationCharacter); - newAs.state = potentialNextState; - //System.out.println("tokened " + chemicalWordLowerCase.substring(tokenizationIndex, posInName))); + for (int j = 0, l = possibleTokenisations.size(); j < l; j++) {//typically list size will be 1 so this is faster than an iterator + int tokenizationIndex = possibleTokenisations.get(j); + AnnotatorState newAs = new AnnotatorState(potentialNextState, annotationCharacter, tokenizationIndex, false, as); + //System.out.println("tokened " + chemicalWordLowerCase.substring(tokenizationIndex, posInName)); asStack.add(newAs); } } } - List possibleAutomata = resourceManager.symbolRegexAutomataDictReversed[i]; + RunAutomaton possibleAutomata = symbolRegexAutomataDictReversed[i]; if (possibleAutomata != null) {//next could be an automaton - for (RunAutomaton automaton : possibleAutomata) { - int matchLength = runInReverse(automaton, chemicalWord, posInName); - if (matchLength != -1){//matchLength = -1 means it did not match - AnnotatorState newAs = new AnnotatorState(); - newAs.posInName = posInName - matchLength; - newAs.tokens = new ArrayList(as.tokens); - newAs.tokens.add(chemicalWord.substring(posInName - matchLength, posInName)); - newAs.annot = new ArrayList(as.annot); - newAs.annot.add(annotationCharacter); - newAs.state = potentialNextState; - //System.out.println("neword automata " + chemicalWord.substring(posInName - matchLength, posInName)); - asStack.add(newAs); - } + int matchLength = runInReverse(possibleAutomata, chemicalWord, posInName); + if (matchLength != -1){//matchLength = -1 means it did not match + int tokenizationIndex = posInName - matchLength; + AnnotatorState newAs = new AnnotatorState(potentialNextState, annotationCharacter, tokenizationIndex, true, as); + //System.out.println("neword automata " + chemicalWord.substring(tokenizationIndex, posInName)); + asStack.add(newAs); } } - List possibleRegexes = resourceManager.symbolRegexesDictReversed[i]; - if (possibleRegexes != null) {//next could be a regex - for (Pattern pattern : possibleRegexes) { - Matcher mat = pattern.matcher(chemicalWord).region(0, posInName); - if (mat.find()) {//match at end (patterns use $ anchor) - AnnotatorState newAs = new AnnotatorState(); - String matchedString = mat.group(0); - newAs.posInName = posInName - matchedString.length(); - newAs.tokens = new ArrayList(as.tokens); - newAs.tokens.add(matchedString); - newAs.annot = new ArrayList(as.annot); - newAs.annot.add(annotationCharacter); - newAs.state = potentialNextState; - //System.out.println("neword regex " + matchedString); - asStack.add(newAs); - } + Pattern possibleRegex = symbolRegexesDictReversed[i]; + if (possibleRegex != null) {//next could be a regex + Matcher mat = possibleRegex.matcher(chemicalWord).region(0, posInName); + mat.useTransparentBounds(true); + if (mat.find()) {//match at end (patterns use $ anchor) + int tokenizationIndex = posInName - mat.group(0).length(); + AnnotatorState newAs = new AnnotatorState(potentialNextState, annotationCharacter, tokenizationIndex, true, as); + //System.out.println("neword regex " + mat.group(0)); + asStack.add(newAs); } } } @@ -166,17 +130,15 @@ } List outputList = new ArrayList(); String uninterpretableName = chemicalWord; - String unparseableName = chemicalWord.substring(0, longestAnnotation.posInName); + String unparseableName = chemicalWord.substring(0, longestAnnotation.getPosInName()); if (successfulAnnotations.size() > 0){//at least some of the name could be interpreted into a substituent/full/functionalTerm int bestAcceptPosInName = -1; for(AnnotatorState as : successfulAnnotations) { - ParseTokens pt = new ParseTokens(as.tokens, as.annot); - outputList.add(pt); - bestAcceptPosInName = as.posInName;//all acceptable annotator states found should have the same posInName + outputList.add(convertAnnotationStateToParseTokens(as, chemicalWord, chemicalWordLowerCase)); + bestAcceptPosInName = as.getPosInName();//all acceptable annotator states found should have the same posInName } uninterpretableName = chemicalWord.substring(0, bestAcceptPosInName); } - outputList = inverseParseTokens(outputList); return new ParseRulesResults(outputList, uninterpretableName, unparseableName); } @@ -205,16 +167,21 @@ } return max; } - - private List inverseParseTokens(List outputList) { - List inversedParseTokens = new ArrayList(); - for (ParseTokens parseTokens : outputList) { - List annotations = new ArrayList(parseTokens.getAnnotations()); - List tokens = new ArrayList(parseTokens.getTokens()); - Collections.reverse(annotations); - Collections.reverse(tokens); - inversedParseTokens.add(new ParseTokens(tokens, annotations)); + + private ParseTokens convertAnnotationStateToParseTokens(AnnotatorState as, String chemicalWord, String chemicalWordLowerCase) { + List tokens = new ArrayList(); + List annotations = new ArrayList(); + AnnotatorState previousAs; + while ((previousAs = as.getPreviousAs()) != null) { + if (as.isCaseSensitive()) { + tokens.add(chemicalWord.substring(as.getPosInName(), previousAs.getPosInName())); + } + else{ + tokens.add(chemicalWordLowerCase.substring(as.getPosInName(), previousAs.getPosInName())); + } + annotations.add(as.getAnnot()); + as = previousAs; } - return inversedParseTokens; + return new ParseTokens(tokens, annotations); } } diff -Nru opsin-1.5.0/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/Ring.java opsin-2.3.1/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/Ring.java --- opsin-1.5.0/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/Ring.java 2013-07-21 14:02:29.000000000 +0000 +++ opsin-2.3.1/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/Ring.java 2017-07-23 20:55:18.000000000 +0000 @@ -1,7 +1,9 @@ package uk.ac.cam.ch.wwmm.opsin; import java.util.ArrayList; +import java.util.LinkedHashMap; import java.util.List; +import java.util.Map; /** * Class representing a single ring (i.e. NOT a fused ring which is formed from multiple rings) @@ -9,13 +11,12 @@ * */ class Ring { - private List atomList = new ArrayList(); - private List bondList; + private final List atomList = new ArrayList(); + private final List bondList; + private final Map bondToNeighbourRings = new LinkedHashMap(); + private List cyclicAtomList; private List cyclicBondList; - private List neighbours = new ArrayList(); - - private int nFusedBonds = 0; Ring(List bondList){ if (bondList==null || bondList.size()==0){ @@ -58,11 +59,7 @@ } int getNumberOfFusedBonds() { - return nFusedBonds; - } - - void incrementNumberOfFusedBonds() { - nFusedBonds++; + return bondToNeighbourRings.size(); } /** @@ -70,14 +67,7 @@ * @return List */ List getFusedBonds(){ - List bonds = new ArrayList(); - - for (Bond bond : bondList) { - if (bond.getFusedRings().size()>0) { - bonds.add(bond); - } - } - return bonds; + return new ArrayList(bondToNeighbourRings.keySet()); } int getBondIndex(Bond bond){ @@ -93,11 +83,18 @@ } List getNeighbours() { - return neighbours; + return new ArrayList(bondToNeighbourRings.values()); + } + + Ring getNeighbourOfFusedBond(Bond fusedBond) { + return bondToNeighbourRings.get(fusedBond); } - void addNeighbour(Ring ring) { - neighbours.add(ring); + void addNeighbour(Bond bond, Ring ring) { + if (this == ring) { + throw new IllegalArgumentException("Ring can't be a neighbour of itself"); + } + bondToNeighbourRings.put(bond, ring); } /** diff -Nru opsin-1.5.0/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/SMILESFragmentBuilder.java opsin-2.3.1/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/SMILESFragmentBuilder.java --- opsin-1.5.0/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/SMILESFragmentBuilder.java 2013-07-21 14:02:29.000000000 +0000 +++ opsin-2.3.1/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/SMILESFragmentBuilder.java 2017-07-23 20:55:18.000000000 +0000 @@ -1,9 +1,15 @@ package uk.ac.cam.ch.wwmm.opsin; -import java.util.*; - -import static uk.ac.cam.ch.wwmm.opsin.OpsinTools.*; import static uk.ac.cam.ch.wwmm.opsin.XmlDeclarations.*; + +import java.util.ArrayDeque; +import java.util.Deque; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; + import uk.ac.cam.ch.wwmm.opsin.Bond.SMILES_BOND_DIRECTION; import uk.ac.cam.ch.wwmm.opsin.BondStereo.BondStereoValue; @@ -48,10 +54,15 @@ private static class StackFrame { /**The Atom currently under consideration.*/ Atom atom; + /**The order of the bond about to be formed.*/ int bondOrder; + /**Whether the bond is a \ or / bond for use in determining cis/trans.*/ - SMILES_BOND_DIRECTION slash; + SMILES_BOND_DIRECTION slash = null; + + /**The index of a dummy atom in the atom's stereochemistry atomrefs4*/ + Integer indexOfDummyAtom = null; /**Creates a stack frame with given parameters. * @@ -61,7 +72,6 @@ StackFrame(Atom a, int bondOrderVal) { atom = a; bondOrder = bondOrderVal; - slash = null; } /**Creates a copy of an existing StackFrame. @@ -74,6 +84,9 @@ } } + /**Ring opening dummy atom, used as a placeholder in stereochemistry atomrefs4*/ + private static final Atom ringOpeningDummyAtom = new Atom(ChemEl.R); + /**Organic Atoms.*/ private static final Set organicAtoms = new HashSet(); /**Aromatic Atoms.*/ @@ -102,535 +115,668 @@ aromaticAtoms.add("sb"); aromaticAtoms.add("te"); } - - /**Build a Fragment based on a SMILES string, with a null type/subType. - * - * @param smiles The SMILES string to build from. - * @param fragManager - * @return The built fragment. - * @throws StructureBuildingException - */ - Fragment build(String smiles, FragmentManager fragManager) throws StructureBuildingException { - return build(smiles, "", "", "", fragManager); + + private final IDManager idManager; + + SMILESFragmentBuilder(IDManager idManager) { + this.idManager = idManager; } - /** - * Build a Fragment based on a SMILES string. - * @param smiles The SMILES string to build from. - * @param type The type of fragment being built. - * @param subType The subtype of fragment being built. - * @param labelMapping A string indicating which locants to assign to each atom. Can be a slash delimited list, "numeric", "fusedRing" or "none". A value of "" is treated as synonymous to numeric - * @param fragManager - * @return Fragment The built fragment. - * @throws StructureBuildingException - */ - Fragment build(String smiles, String type, String subType, String labelMapping, FragmentManager fragManager) throws StructureBuildingException { - if (smiles==null){ - throw new StructureBuildingException("SMILES specified is null"); - } - if (type==null){ - throw new StructureBuildingException("type specified is null, use \"\" if a type is not desired "); - } - if (subType==null){ - throw new StructureBuildingException("subType specified is null, use \"\" if a subType is not desired "); - } - if (labelMapping==null){ - throw new StructureBuildingException("labelMapping is null use \"none\" if you do not want any numbering or \"numeric\" if you would like default numbering"); - } - String[] labelMap = null; - if (labelMapping.equals("")){ - labelMapping = NUMERIC_LABELS_VAL; - } - if(!labelMapping.equals(NONE_LABELS_VAL) && !labelMapping.equals(FUSEDRING_LABELS_VAL) ) { - labelMap = MATCH_SLASH.split(labelMapping, -1);//place slash delimited labels into an array - } - int currentNumber = 1; - Fragment currentFrag = new Fragment(type, subType); - Stack stack = new Stack(); - stack.push(new StackFrame(null, 1)); - HashMap closures = new HashMap();//used for ring closures - String tmpString = smiles; - char firstCharacter =tmpString.charAt(0); - if(firstCharacter == '-' || firstCharacter == '=' || firstCharacter == '#') {//used by OPSIN to specify the valency with which this fragment connects - tmpString = tmpString.substring(1); - } - char lastCharacter =tmpString.charAt(tmpString.length()-1); - if(lastCharacter == '-' || lastCharacter == '=' || lastCharacter == '#') {//used by OPSIN to specify the valency with which this fragment connects and to indicate it connects via the last atom in the SMILES - tmpString = tmpString.substring(0, tmpString.length()-1); + private class ParserInstance { + private final Deque stack = new ArrayDeque(); + private final Map ringClosures = new HashMap(); + + private final String smiles; + private final int endOfSmiles; + private final Fragment fragment; + + private int i = 0; + + public ParserInstance(String smiles, Fragment fragment) { + this.smiles = smiles; + this.endOfSmiles = smiles.length(); + this.fragment = fragment; + } + + void parseSmiles() throws StructureBuildingException { + stack.add(new StackFrame(null, 1)); + for (; i < endOfSmiles; i++) { + char ch = smiles.charAt(i); + switch (ch) { + case '(': + stack.add(new StackFrame(stack.getLast())); + break; + case ')': + stack.removeLast(); + break; + case '-': + stack.getLast().bondOrder = 1; + break; + case '=': + if (stack.getLast().bondOrder != 1){ + throw new StructureBuildingException("= in unexpected position: bond order already defined!"); + } + stack.getLast().bondOrder = 2; + break; + case '#': + if (stack.getLast().bondOrder != 1){ + throw new StructureBuildingException("# in unexpected position: bond order already defined!"); + } + stack.getLast().bondOrder = 3; + break; + case '/': + if (stack.getLast().slash != null){ + throw new StructureBuildingException("/ in unexpected position: bond configuration already defined!"); + } + stack.getLast().slash = SMILES_BOND_DIRECTION.RSLASH; + break; + case '\\': + if (stack.getLast().slash != null){ + throw new StructureBuildingException("\\ in unexpected position: bond configuration already defined!"); + } + stack.getLast().slash = SMILES_BOND_DIRECTION.LSLASH; + break; + case '.': + stack.getLast().atom = null; + break; + case 'a': + case 'b': + case 'c': + case 'd': + case 'e': + case 'f': + case 'g': + case 'h': + case 'i': + case 'j': + case 'k': + case 'l': + case 'm': + case 'n': + case 'o': + case 'p': + case 'q': + case 'r': + case 's': + case 't': + case 'u': + case 'v': + case 'w': + case 'x': + case 'y': + case 'z': + case 'A': + case 'B': + case 'C': + case 'D': + case 'E': + case 'F': + case 'G': + case 'H': + case 'I': + case 'J': + case 'K': + case 'L': + case 'M': + case 'N': + case 'O': + case 'P': + case 'Q': + case 'R': + case 'S': + case 'T': + case 'U': + case 'V': + case 'W': + case 'X': + case 'Y': + case 'Z': + case '*': + processOrganicAtom(ch); + break; + case '[': + processBracketedAtom(); + break; + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + case '%': + processRingOpeningOrClosure(ch); + break; + default: + throw new StructureBuildingException(ch + " is in an unexpected position. Check this is not a mistake and that this feature of SMILES is supported by OPSIN's SMILES parser"); + } + } + if (!ringClosures.isEmpty()){ + throw new StructureBuildingException("Unmatched ring opening"); + } } - while(tmpString.length() > 0) { - Character nextChar = tmpString.charAt(0); - tmpString = tmpString.substring(1); - if(nextChar == '(') { - stack.push(new StackFrame(stack.peek())); - } else if(nextChar == ')') { - stack.pop(); - } else if(nextChar == '-'){ - stack.peek().bondOrder = 1; - } else if(nextChar == '='){ - if (stack.peek().bondOrder != 1){ - throw new StructureBuildingException("= in unexpected position: bond order already defined!"); - } - stack.peek().bondOrder = 2; - } else if(nextChar == '#'){ - if (stack.peek().bondOrder != 1){ - throw new StructureBuildingException("# in unexpected position: bond order already defined!"); - } - stack.peek().bondOrder = 3; - } else if(nextChar == '/'){ - if (stack.peek().slash!=null){ - throw new StructureBuildingException("/ in unexpected position: bond configuration already defined!"); - } - stack.peek().slash = SMILES_BOND_DIRECTION.RSLASH; - } else if(nextChar == '\\'){ - if (stack.peek().slash != null){ - throw new StructureBuildingException("\\ in unexpected position: bond configuration already defined!"); - } - stack.peek().slash = SMILES_BOND_DIRECTION.LSLASH; - } else if(nextChar == '.'){ - stack.peek().atom = null; - } else if(Character.isLetter(nextChar)) {//organic atoms - String elementType = String.valueOf(nextChar); - boolean spareValency =false; - if(Character.isUpperCase(nextChar)) {//normal atoms - if(tmpString.length() > 0 && Character.isLowerCase(tmpString.charAt(0)) && organicAtoms.contains(elementType + tmpString.substring(0,1))) { - elementType += tmpString.substring(0,1); - tmpString = tmpString.substring(1); + /** + * An organic atom e.g. 'C', 'Cl', 'c' etc. + * @param ch + * @throws StructureBuildingException + */ + private void processOrganicAtom(char ch) throws StructureBuildingException { + String elementType = String.valueOf(ch); + boolean spareValency = false; + if(is_A_to_Z(ch)) {//normal atoms + if(i + 1 < endOfSmiles && is_a_to_z(smiles.charAt(i + 1)) && organicAtoms.contains(smiles.substring(i, i + 2))) { + elementType = smiles.substring(i, i + 2); + i++; + } + else if (!organicAtoms.contains(elementType)){ + throw new StructureBuildingException(elementType + " is not an organic Element. If it is actually an element it should be in square brackets"); + } + } + else if(is_a_to_z(ch)) {//aromatic atoms + if (!aromaticAtoms.contains(elementType)){ + throw new StructureBuildingException(elementType + " is not an aromatic Element. If it is actually an element it should not be in lower case"); + } + elementType = String.valueOf((char)(ch - 32)); + spareValency = true; + } + else if (ch == '*') { + elementType = "R"; + } + Atom atom = createAtom(elementType, fragment); + atom.setSpareValency(spareValency); + fragment.addAtom(atom); + + StackFrame currentFrame = stack.getLast(); + if(currentFrame.atom != null) { + Bond b = createBond(currentFrame.atom, atom, currentFrame.bondOrder); + if (currentFrame.slash != null){ + b.setSmilesStereochemistry(currentFrame.slash); + currentFrame.slash = null; + } + if (currentFrame.atom.getAtomParity() != null){ + addAtomToAtomParity(currentFrame.atom.getAtomParity(), atom); + } + } + currentFrame.atom = atom; + currentFrame.bondOrder = 1; + } + + /** + * square brackets- contain non-organic atoms or where required to set properties such as charge/chirality etc. + * e.g. [Na+] + * @throws StructureBuildingException + */ + private void processBracketedAtom() throws StructureBuildingException { + i++; + int indexOfRightSquareBracket = smiles.indexOf(']', i); + if (indexOfRightSquareBracket == -1) { + throw new StructureBuildingException("[ without matching \"]\""); + } + // isotope + String isotope = ""; + while(is_0_to_9(smiles.charAt(i))) { + isotope += smiles.charAt(i); + i++; + } + + char ch; + if (i < indexOfRightSquareBracket){ + ch = smiles.charAt(i); + i++; + } + else{ + throw new StructureBuildingException("No element found in square brackets"); + } + // elementType + String elementType = String.valueOf(ch); + boolean spareValency = false; + if(is_A_to_Z(ch)) {//normal atoms + if(is_a_to_z(smiles.charAt(i))) { + elementType += smiles.charAt(i); + i++; + } + } + else if(is_a_to_z(ch)) {//aromatic atoms + if(is_a_to_z(smiles.charAt(i))) { + if (aromaticAtoms.contains(elementType + smiles.charAt(i))){ + elementType = String.valueOf((char)(ch - 32)) + smiles.charAt(i); + i++; } - else if (!organicAtoms.contains(elementType)){ - throw new StructureBuildingException(elementType +" is not an organic Element. If it is actually an element it should be in square brackets"); + else{ + throw new StructureBuildingException(elementType + smiles.charAt(i) + " is not an aromatic Element. If it is actually an element it should not be in lower case"); } - } - else if(Character.isLowerCase(nextChar)) {//aromatic atoms + } + else{ if (!aromaticAtoms.contains(elementType)){ - throw new StructureBuildingException(elementType +" is not an aromatic Element. If it is actually an element it should not be in lower case"); + throw new StructureBuildingException(elementType + " is not an aromatic Element."); } - elementType = elementType.toUpperCase(); - spareValency =true; - } - Atom atom = fragManager.createAtom(elementType, currentFrag); - atom.setSpareValency(spareValency); - if(labelMapping.equals(NUMERIC_LABELS_VAL)) { - atom.addLocant(Integer.toString(currentNumber)); - } else if (labelMap !=null){ - String labels[] = MATCH_COMMA.split(labelMap[currentNumber-1]); - for (String label : labels) { - if (!label.equals("")) { - atom.addLocant(label); - } - } - } - currentFrag.addAtom(atom); - if(stack.peek().atom !=null) { - Bond b = fragManager.createBond(stack.peek().atom, atom, stack.peek().bondOrder); - if (stack.peek().slash!=null){ - b.setSmilesStereochemistry(stack.peek().slash); - stack.peek().slash = null; - } - if (stack.peek().atom.getAtomParity()!=null){ - addAtomToAtomParity(stack.peek().atom.getAtomParity(), atom); - } - } - stack.peek().atom = atom; - stack.peek().bondOrder = 1; - currentNumber++; - } else if(nextChar == '[') {//square brackets- contain non-organic atoms and are used to unambiguously set charge/chirality etc. - int indexOfRightSquareBracket = tmpString.indexOf(']'); - if (indexOfRightSquareBracket == -1) { - throw new StructureBuildingException("[ without matching \"]\""); - } - String atomString = tmpString.substring(0, indexOfRightSquareBracket);//the contents of the square bracket - tmpString = tmpString.substring(indexOfRightSquareBracket +1); - // isotope - String isotope = ""; - while(atomString.length() > 0 && Character.isDigit(atomString.charAt(0))) { - isotope += atomString.charAt(0); - atomString = atomString.substring(1); - } - - if (atomString.length() > 0){ - nextChar = atomString.charAt(0); - atomString = atomString.substring(1); - } - else{ - throw new StructureBuildingException("No element found in square brackets"); - } - // elementType - String elementType = String.valueOf(nextChar); - boolean spareValency = false; - if(Character.isUpperCase(nextChar)) {//normal atoms - if(atomString.length() > 0 && Character.isLowerCase(atomString.charAt(0))) { - elementType += atomString.substring(0,1); - atomString = atomString.substring(1); - } - } - else if(Character.isLowerCase(nextChar)) {//aromatic atoms - if(atomString.length() > 0 && Character.isLowerCase(atomString.charAt(0))) { - if (aromaticAtoms.contains(elementType + atomString.substring(0,1))){ - elementType = elementType.toUpperCase() + atomString.substring(0,1); - atomString = atomString.substring(1); + elementType = String.valueOf((char)(ch - 32)); + } + spareValency = true; + } + else if (elementType.equals("*")){ + elementType = "R"; + } + else{ + throw new StructureBuildingException(elementType + " is not a valid element type!"); + } + Atom atom = createAtom(elementType, fragment); + atom.setSpareValency(spareValency); + if (isotope.length() > 0){ + atom.setIsotope(Integer.parseInt(isotope)); + } + fragment.addAtom(atom); + StackFrame currentFrame = stack.getLast(); + if(currentFrame.atom != null) { + Bond b = createBond(currentFrame.atom, atom, currentFrame.bondOrder); + if (currentFrame.slash != null){ + b.setSmilesStereochemistry(currentFrame.slash); + currentFrame.slash = null; + } + if (currentFrame.atom.getAtomParity() != null){ + addAtomToAtomParity(currentFrame.atom.getAtomParity(), atom); + } + } + Atom previousAtom = currentFrame.atom;//needed for setting atomParity elements up + currentFrame.atom = atom; + currentFrame.bondOrder = 1; + + Integer hydrogenCount = 0; + int charge = 0; + Boolean chiralitySet = false; + for (; i < indexOfRightSquareBracket; i++) { + ch = smiles.charAt(i); + if(ch == '@') {// chirality-sets atom parity + if (chiralitySet){ + throw new StructureBuildingException("Atom parity appeared to be specified twice for an atom in a square bracket!"); + } + processTetrahedralStereochemistry(atom, previousAtom); + chiralitySet = true; + } + else if (ch == 'H'){// hydrogenCount + if (hydrogenCount == null || hydrogenCount != 0){ + throw new StructureBuildingException("Hydrogen count appeared to be specified twice for an atom in a square bracket!"); + } + if (smiles.charAt(i + 1) == '?'){ + //extension to allow standard valency (as determined by the group in the periodic table) to dictate hydrogens + i++; + hydrogenCount = null; + } + else{ + String hydrogenCountString =""; + while(is_0_to_9(smiles.charAt(i + 1))) { + hydrogenCountString += smiles.charAt(i + 1); + i++; + } + if (hydrogenCountString.length() == 0){ + hydrogenCount = 1; } else{ - throw new StructureBuildingException(elementType + atomString.substring(0,1) +" is not an aromatic Element. If it is actually an element it should not be in lower case"); + hydrogenCount = Integer.parseInt(hydrogenCountString); } - } - else{ - if (!aromaticAtoms.contains(elementType)){ - throw new StructureBuildingException(elementType +" is not an aromatic Element."); + if (atom.hasSpareValency()) { + if ((!elementType.equals("C") && !elementType.equals("Si")) || hydrogenCount >=2){ + fragment.addIndicatedHydrogen(atom); + } } - elementType = elementType.toUpperCase(); } - spareValency =true; - } - else if (elementType.equals("*")){ - elementType = "R"; - } - else{ - throw new StructureBuildingException(elementType +" is not a valid element type!"); - } - Atom atom = fragManager.createAtom(elementType, currentFrag); - atom.setSpareValency(spareValency); - if (!isotope.equals("")){ - atom.setIsotope(Integer.parseInt(isotope)); - } - if(labelMapping.equals(NUMERIC_LABELS_VAL)) { - atom.addLocant(Integer.toString(currentNumber)); - } else if (labelMap !=null){ - String labels[] = MATCH_COMMA.split(labelMap[currentNumber-1]); - for (String label : labels) { - if (!label.equals("")) { - atom.addLocant(label); - } - } - } - currentFrag.addAtom(atom); - if(stack.peek().atom != null) { - Bond b = fragManager.createBond(stack.peek().atom, atom, stack.peek().bondOrder); - if (stack.peek().slash!=null){ - b.setSmilesStereochemistry(stack.peek().slash); - stack.peek().slash = null; - } - if (stack.peek().atom.getAtomParity()!=null){ - addAtomToAtomParity(stack.peek().atom.getAtomParity(), atom); - } - } - Atom previousAtom = stack.peek().atom;//needed for setting atomParity elements up - stack.peek().atom = atom; - stack.peek().bondOrder = 1; - currentNumber++; - - Integer hydrogenCount =0; - int charge = 0; - Boolean chiralitySet = false; - while (atomString.length()>0){ - nextChar = atomString.charAt(0); - atomString = atomString.substring(1); - if(nextChar == '@') {// chirality-sets atom parity - if (chiralitySet){ - throw new StructureBuildingException("Atom parity appeared to be specified twice for an atom in a square bracket!"); - } - atomString = processTetrahedralStereochemistry(atomString, atom, previousAtom); - chiralitySet = true; - } - else if (nextChar == 'H'){// hydrogenCount - if (hydrogenCount ==null || hydrogenCount != 0){ - throw new StructureBuildingException("Hydrogen count appeared to be specified twice for an atom in a square bracket!"); - } - if (atomString.length() > 0 && atomString.charAt(0)=='?'){ - atomString = atomString.substring(1); - hydrogenCount=null; - } - else{ - String hydrogenCountString =""; - while(atomString.length() > 0 && Character.isDigit(atomString.charAt(0))) { - hydrogenCountString += atomString.substring(0,1); - atomString = atomString.substring(1); - } - if (hydrogenCountString.equals("")){ - hydrogenCount=1; - } - else{ - hydrogenCount = Integer.parseInt(hydrogenCountString); - } - if (atom.hasSpareValency()) { - if ((!elementType.equals("C") && !elementType.equals("Si")) || hydrogenCount >=2){ - currentFrag.addIndicatedHydrogen(atom); - } - } - } - } - else if(nextChar == '+' || nextChar == '-') {// formalCharge - if (charge != 0){ - throw new StructureBuildingException("Charge appeared to be specified twice for an atom in a square bracket!"); - } - charge = nextChar == '+' ? 1 : -1; - String changeChargeStr = ""; - int changeCharge = 1; - while(atomString.length() > 0 && Character.isDigit(atomString.charAt(0))) {//e.g. [C+2] - changeChargeStr+= atomString.substring(0,1); - atomString = atomString.substring(1); - } - if (changeChargeStr.equals("")){ - while(atomString.length() > 0){//e.g. [C++] - nextChar = atomString.charAt(0); - if (nextChar == '+'){ - if (charge != 1){ - throw new StructureBuildingException("Atom has both positive and negative charges specified!");//e.g. [C+-] - } - } - else if (nextChar == '-'){ - if (charge != -1){ - throw new StructureBuildingException("Atom has both negative and positive charges specified!"); - } - } - else{ - break; - } - changeCharge++; - atomString = atomString.substring(1); - } - } - changeCharge = changeChargeStr.equals("") ? changeCharge : Integer.parseInt(changeChargeStr); - atom.setCharge(atom.getCharge() + (charge * changeCharge) ); - } - else if(nextChar == '|') { - String lambda = ""; - while(atomString.length() > 0 && Character.isDigit(atomString.charAt(0))) { - lambda += atomString.substring(0,1); - atomString = atomString.substring(1); + } + else if(ch == '+' || ch == '-') {// formalCharge + if (charge != 0){ + throw new StructureBuildingException("Charge appeared to be specified twice for an atom in a square bracket!"); + } + charge = (ch == '+') ? 1 : -1; + String changeChargeStr = ""; + int changeCharge = 1; + while(is_0_to_9(smiles.charAt(i + 1))) {//e.g. [C+2] + changeChargeStr += smiles.charAt(i + 1); + i++; + } + if (changeChargeStr.length() == 0){ + while(i + 1 < indexOfRightSquareBracket){//e.g. [C++] + ch = smiles.charAt(i + 1); + if (ch == '+'){ + if (charge != 1){ + throw new StructureBuildingException("Atom has both positive and negative charges specified!");//e.g. [C+-] + } + } + else if (ch == '-'){ + if (charge != -1){ + throw new StructureBuildingException("Atom has both negative and positive charges specified!"); + } + } + else{ + break; + } + changeCharge++; + i++; } - atom.setLambdaConventionValency(Integer.parseInt(lambda)); } - else{ - throw new StructureBuildingException("Unexpected character found in square bracket"); - } - } - atom.setProperty(Atom.SMILES_HYDROGEN_COUNT, hydrogenCount); - } else if(Character.isDigit(nextChar)|| nextChar == '%') { - tmpString = processRingOpeningOrClosure(fragManager, stack, closures, tmpString, nextChar); + changeCharge = changeChargeStr.length() == 0 ? changeCharge : Integer.parseInt(changeChargeStr); + atom.setCharge(charge * changeCharge); + } + else if(ch == '|') { + StringBuilder lambda = new StringBuilder(); + while(i < endOfSmiles && is_0_to_9(smiles.charAt(i + 1))) { + lambda.append(smiles.charAt(i + 1)); + i++; + } + atom.setLambdaConventionValency(Integer.parseInt(lambda.toString())); + } + else{ + throw new StructureBuildingException("Unexpected character found in square bracket"); + } + } + atom.setProperty(Atom.SMILES_HYDROGEN_COUNT, hydrogenCount); + } + + /** + * Adds an atomParity element to the given atom using the information at the current index + * @param atom + * @param previousAtom + */ + private void processTetrahedralStereochemistry(Atom atom, Atom previousAtom){ + Boolean chiralityClockwise = false; + if (smiles.charAt(i + 1) == '@'){ + chiralityClockwise = true; + i++; + } + AtomParity atomParity; + if (chiralityClockwise){ + atomParity = new AtomParity(new Atom[4], 1); } else{ - throw new StructureBuildingException(nextChar + " is in an unexpected position. Check this is not a mistake and that this feature of SMILES is supported by OPSIN's SMILES parser"); + atomParity = new AtomParity(new Atom[4], -1); + } + Atom[] atomRefs4 = atomParity.getAtomRefs4(); + int index =0; + if (previousAtom != null){ + atomRefs4[index] = previousAtom; + index++; + } + if (smiles.charAt(i + 1) == 'H'){ + atomRefs4[index] = AtomParity.hydrogen; + //this character will also be checked by the hydrogen count check, hence don't increment i + } + atom.setAtomParity(atomParity); + } + + /** + * Process ring openings and closings e.g. the two 1s in c1ccccc1 + * @param ch + * @throws StructureBuildingException + */ + private void processRingOpeningOrClosure(char ch) throws StructureBuildingException { + String closure = String.valueOf(ch); + if(ch == '%') { + if (i + 2 < endOfSmiles && is_0_to_9(smiles.charAt(i + 1)) && is_0_to_9(smiles.charAt(i + 2))) { + closure = smiles.substring(i + 1, i + 3); + i +=2; + } + else{ + throw new StructureBuildingException("A ring opening indice after a % must be two digits long"); + } + } + if(ringClosures.containsKey(closure)) { + processRingClosure(closure); + } else { + if (getInscopeAtom() == null){ + throw new StructureBuildingException("A ring opening has appeared before any atom!"); + } + processRingOpening(closure); } - } - if (labelMap != null && labelMap.length >= currentNumber ){ - throw new StructureBuildingException("Group numbering has been invalidly defined in resource file: labels: " +labelMap.length + ", atoms: " + (currentNumber -1) ); - } - if (!closures.isEmpty()){ - throw new StructureBuildingException("Unmatched ring opening"); } - if(labelMapping.equals(FUSEDRING_LABELS_VAL)) {//fragment is a fusedring with atoms in the correct order for fused ring numbering - //this will do stuff like changing labels from 1,2,3,4,5,6,7,8,9,10->1,2,3,4,4a,5,6,7,8,8a - FragmentTools.relabelFusedRingSystem(currentFrag); + private void processRingOpening(String closure) throws StructureBuildingException { + StackFrame currentFrame = stack.getLast(); + StackFrame sf = new StackFrame(currentFrame); + if (currentFrame.slash != null){ + sf.slash = currentFrame.slash; + currentFrame.slash = null; + } + AtomParity atomParity = sf.atom.getAtomParity(); + if (atomParity != null){//replace ringclosureX with actual reference to id when it is known + sf.indexOfDummyAtom = addAtomToAtomParity(atomParity, ringOpeningDummyAtom); + } + ringClosures.put(closure, sf); + currentFrame.bondOrder = 1; } - List atomList =currentFrag.getAtomList(); - verifyAndTakeIntoAccountLonePairsInAtomParities(atomList); - addBondStereoElements(currentFrag); - if(lastCharacter == '-' || lastCharacter == '=' || lastCharacter == '#') { - Atom lastAtom = stack.peek().atom;//note that in something like C(=O)- this would be the carbon not the oxygen - if (lastCharacter == '#'){ - currentFrag.addOutAtom(lastAtom, 3, true); - } - else if (lastCharacter == '='){ - currentFrag.addOutAtom(lastAtom, 2, true); + private void processRingClosure(String closure) throws StructureBuildingException { + StackFrame sf = ringClosures.remove(closure); + StackFrame currentFrame = stack.getLast(); + int bondOrder = 1; + if(sf.bondOrder > 1) { + if(currentFrame.bondOrder > 1 && sf.bondOrder != currentFrame.bondOrder){ + throw new StructureBuildingException("ring closure has two different bond orders specified!"); + } + bondOrder = sf.bondOrder; + } else if(currentFrame.bondOrder > 1) { + bondOrder = currentFrame.bondOrder; + } + Bond b; + if (currentFrame.slash != null) { + //stereochemistry specified on ring closure + //special case e.g. CC1=C/F.O\1 Bond is done from the O to the the C due to the presence of the \ + b = createBond(currentFrame.atom, sf.atom, bondOrder); + b.setSmilesStereochemistry(currentFrame.slash); + if(sf.slash != null && sf.slash.equals(currentFrame.slash)) {//specified twice check for contradiction + throw new StructureBuildingException("Contradictory double bond stereoconfiguration"); + } + currentFrame.slash = null; } - else{ - currentFrag.addOutAtom(lastAtom, 1, true); + else { + b = createBond(sf.atom, currentFrame.atom, bondOrder); + if (sf.slash != null) { + //stereochemistry specified on ring opening + b.setSmilesStereochemistry(sf.slash); + } } - } - if(firstCharacter == '-'){ - currentFrag.addOutAtom(currentFrag.getFirstAtom(),1, true); - } - else if(firstCharacter == '='){ - currentFrag.addOutAtom(currentFrag.getFirstAtom(),2, true); - } - else if (firstCharacter == '#'){ - currentFrag.addOutAtom(currentFrag.getFirstAtom(),3, true); + AtomParity currentAtomParity = currentFrame.atom.getAtomParity(); + if (currentAtomParity != null) { + addAtomToAtomParity(currentAtomParity, sf.atom); + } + + AtomParity closureAtomParity = sf.atom.getAtomParity(); + if (closureAtomParity != null) {//replace dummy atom with actual atom e.g. N[C@@H]1C.F1 where the 1 initially holds a dummy atom before being replaced with the F atom + Atom[] atomRefs4 = closureAtomParity.getAtomRefs4(); + if (sf.indexOfDummyAtom == null) { + throw new RuntimeException("OPSIN Bug: Index of dummy atom representing ring closure atom not set"); + } + atomRefs4[sf.indexOfDummyAtom] = currentFrame.atom; + } + currentFrame.bondOrder = 1; } - for (Atom atom : atomList) { - if (atom.getProperty(Atom.SMILES_HYDROGEN_COUNT)!=null && atom.getLambdaConventionValency() ==null){ - setupAtomValency(fragManager, atom); + /** + * Adds an atom at the first non-null position in the atomParity's atomRefs4 + * @param atomParity + * @param atom + * @return Returns the index of the atom in the atomParity's atomRefs4 + * @throws StructureBuildingException + */ + private int addAtomToAtomParity(AtomParity atomParity, Atom atom) throws StructureBuildingException { + Atom[] atomRefs4 = atomParity.getAtomRefs4(); + boolean setAtom = false; + int i = 0; + for (; i < atomRefs4.length; i++) { + if (atomRefs4[i] == null){ + atomRefs4[i] = atom; + setAtom = true; + break; + } + } + if (!setAtom){ + throw new StructureBuildingException("Tetrahedral stereocentre specified in SMILES appears to involve more than 4 atoms"); } + return i; + } + + /** + * For non-empty SMILES will return the atom at the top of the stack i.e. the one that will be bonded to next if the SMILES continued + * (only valid during execution of and after {@link ParserInstance#parseSmiles()} has been called) + * @return + */ + Atom getInscopeAtom(){ + return stack.getLast().atom; } - CycleDetector.assignWhetherAtomsAreInCycles(currentFrag); - return currentFrag; } - + /** - * Adds an atomParity element to the given atom using the descriptor in atomString - * @param atomString - * @param atom - * @param previousAtom - * @return + * Build a Fragment based on a SMILES string. + * The type/subType of the Fragment are the empty String + * The fragment has no locants + * + * @param smiles The SMILES string to build from. + * @return The built fragment. + * @throws StructureBuildingException */ - private String processTetrahedralStereochemistry(String atomString, Atom atom, Atom previousAtom){ - Boolean chiralityClockwise = false; - if (atomString.length() > 0 && atomString.charAt(0) == '@'){ - chiralityClockwise = true; - atomString = atomString.substring(1); - } - AtomParity atomParity; - if (chiralityClockwise){ - atomParity = new AtomParity(new Atom[4], 1); - } - else{ - atomParity = new AtomParity(new Atom[4], -1); - } - Atom[] atomRefs4 = atomParity.getAtomRefs4(); - int indice =0; - if (previousAtom !=null){ - atomRefs4[indice] = previousAtom; - indice++; - } - if (atomString.length() > 0 && atomString.charAt(0) == 'H'){ - atomRefs4[indice] = AtomParity.hydrogen; - } - atom.setAtomParity(atomParity); - return atomString; + Fragment build(String smiles) throws StructureBuildingException { + return build(smiles, "", NONE_LABELS_VAL); } - + /** - * Process ring openings and closings e.g. the two 1s in c1ccccc1 - * @param fragManager - * @param stack - * @param closures - * @param tmpString - * @param nextChar + * Build a Fragment based on a SMILES string. + * @param smiles The SMILES string to build from. + * @param type The type of the fragment retrieved when calling {@link Fragment#getType()} + * @param labelMapping A string indicating which locants to assign to each atom. Can be a slash delimited list, "numeric", "fusedRing" or "none"/"" * @return * @throws StructureBuildingException */ - private String processRingOpeningOrClosure(FragmentManager fragManager, - Stack stack, HashMap closures, - String tmpString, Character nextChar) - throws StructureBuildingException { - String closure = String.valueOf(nextChar); - if(nextChar == '%') { - if (tmpString.length() >=2 && Character.isDigit(tmpString.charAt(0)) && Character.isDigit(tmpString.charAt(1))) { - closure = tmpString.substring(0,2); - tmpString = tmpString.substring(2); - } - else{ - throw new StructureBuildingException("A ring opening indice after a % must be two digits long"); - } - } - if(closures.containsKey(closure)) { - processRingClosure(fragManager, stack, closures, closure); - } else { - if (stack.peek().atom==null){ - throw new StructureBuildingException("A ring opening has appeared before any atom!"); - } - processRingOpening(stack, closures, closure); - } - return tmpString; + Fragment build(String smiles, String type, String labelMapping) throws StructureBuildingException { + return build(smiles, new Fragment(type), labelMapping); } - private void processRingOpening(Stack stack, - HashMap closures, String closure) throws StructureBuildingException { - StackFrame sf = new StackFrame(stack.peek()); - if (stack.peek().slash!=null){ - sf.slash = stack.peek().slash; - stack.peek().slash = null; - } - if (sf.atom.getAtomParity()!=null){//replace ringclosureX with actual reference to id when it is known - Atom dummyRingClosureAtom = new Atom(closure); - addAtomToAtomParity(sf.atom.getAtomParity(), dummyRingClosureAtom); + /** + * Build a Fragment based on a SMILES string. + * @param smiles The SMILES string to build from. + * @param tokenEl The corresponding tokenEl + * @param labelMapping A string indicating which locants to assign to each atom. Can be a slash delimited list, "numeric", "fusedRing" or "none"/"" + * @return Fragment The built fragment. + * @throws StructureBuildingException + */ + Fragment build(String smiles, Element tokenEl, String labelMapping) throws StructureBuildingException { + if (tokenEl == null){ + throw new IllegalArgumentException("tokenEl is null. FragmentManager's DUMMY_TOKEN should be used instead"); } - closures.put(closure, sf); - stack.peek().bondOrder = 1; + return build(smiles, new Fragment(tokenEl), labelMapping); } - - private void addAtomToAtomParity(AtomParity atomParity, Atom atom) throws StructureBuildingException { - Atom[] atomRefs4 = atomParity.getAtomRefs4(); - boolean setAtom =false; - for (int i = 0; i < atomRefs4.length; i++) { - if (atomRefs4[i] ==null){ - atomRefs4[i] = atom; - setAtom =true; - break; - } + + private Fragment build(String smiles, Fragment fragment, String labelMapping) throws StructureBuildingException { + if (smiles == null){ + throw new IllegalArgumentException("SMILES specified is null"); + } + if (labelMapping == null){ + throw new IllegalArgumentException("labelMapping is null use \"none\" if you do not want any numbering or \"numeric\" if you would like default numbering"); + } + if (smiles.length() == 0){ + return fragment; + } + int firstIndex = 0; + int lastIndex = smiles.length(); + char firstCharacter =smiles.charAt(0); + if(firstCharacter == '-' || firstCharacter == '=' || firstCharacter == '#') {//used by OPSIN to specify the valency with which this fragment connects + firstIndex++; } - if (!setAtom){ - throw new StructureBuildingException("Tetrahedral stereocentre specified in SMILES appears to involve more than 4 atoms"); + char lastCharacter =smiles.charAt(lastIndex - 1); + if(lastCharacter == '-' || lastCharacter == '=' || lastCharacter == '#') {//used by OPSIN to specify the valency with which this fragment connects and to indicate it connects via the last atom in the SMILES + lastIndex--; } - } + ParserInstance instance = new ParserInstance(smiles.substring(firstIndex, lastIndex), fragment); + instance.parseSmiles(); + + List atomList = fragment.getAtomList(); + processLabelling(labelMapping, atomList); - private void processRingClosure(FragmentManager fragManager, - Stack stack, HashMap closures, - String closure) throws StructureBuildingException { - StackFrame sf = closures.remove(closure); - int bondOrder = 1; - if(sf.bondOrder > 1) { - if(stack.peek().bondOrder > 1 && sf.bondOrder != stack.peek().bondOrder){ - throw new StructureBuildingException("ring closure has two different bond orders specified!"); - } - bondOrder = sf.bondOrder; - } else if(stack.peek().bondOrder > 1) { - bondOrder = stack.peek().bondOrder; + verifyAndTakeIntoAccountLonePairsInAtomParities(atomList); + addBondStereoElements(fragment); + + if(firstCharacter == '-'){ + fragment.addOutAtom(fragment.getFirstAtom(), 1, true); } - Bond b; - if (stack.peek().slash ==null){ - b = fragManager.createBond(sf.atom, stack.peek().atom, bondOrder); + else if(firstCharacter == '='){ + fragment.addOutAtom(fragment.getFirstAtom(), 2, true); } - else{ - b = fragManager.createBond(stack.peek().atom, sf.atom, bondOrder);//special case e.g. CC1=C/F.O\1 Bond is done from the O to the the C due to the presence of the \ + else if (firstCharacter == '#'){ + fragment.addOutAtom(fragment.getFirstAtom(), 3, true); } - if(sf.slash !=null) { - if(stack.peek().slash !=null) { - if (sf.slash.equals(stack.peek().slash)){ - throw new StructureBuildingException("Contradictory double bond stereoconfiguration"); - } + + if(lastCharacter == '-' || lastCharacter == '=' || lastCharacter == '#') { + Atom lastAtom = instance.getInscopeAtom();//note that in something like C(=O)- this would be the carbon not the oxygen + if (lastCharacter == '#'){ + fragment.addOutAtom(lastAtom, 3, true); + } + else if (lastCharacter == '='){ + fragment.addOutAtom(lastAtom, 2, true); } else{ - b.setSmilesStereochemistry(sf.slash); + fragment.addOutAtom(lastAtom, 1, true); } - } else if(stack.peek().slash !=null) { - b.setSmilesStereochemistry(stack.peek().slash); - stack.peek().slash = null; } - if (stack.peek().atom.getAtomParity()!=null){ - AtomParity atomParity = stack.peek().atom.getAtomParity(); - addAtomToAtomParity(atomParity, sf.atom); + + for (Atom atom : atomList) { + if (atom.getProperty(Atom.SMILES_HYDROGEN_COUNT) != null && atom.getLambdaConventionValency() == null){ + setupAtomValency(atom); + } } - if (sf.atom.getAtomParity()!=null){//replace dummy atom with actual atom e.g. N[C@@H]1C.F1 where the 1 initially holds a dummy atom before being replaced with the F atom - AtomParity atomParity = sf.atom.getAtomParity(); - Atom[] atomRefs4 = atomParity.getAtomRefs4(); - boolean replacedAtom =false; - for (int i = 0; i < atomRefs4.length; i++) { - if (atomRefs4[i] !=null && atomRefs4[i].getElement().equals(closure)){ - atomRefs4[i] = stack.peek().atom; - replacedAtom =true; - break; - } + CycleDetector.assignWhetherAtomsAreInCycles(fragment); + return fragment; + } + + private void processLabelling(String labelMapping, List atomList) throws StructureBuildingException { + if (labelMapping.equals(NONE_LABELS_VAL) || labelMapping.length() == 0) { + return; + } + if (labelMapping.equals(NUMERIC_LABELS_VAL)) { + int atomNumber = 1; + for (Atom atom : atomList) { + atom.addLocant(Integer.toString(atomNumber++)); } - if (!replacedAtom){ - throw new StructureBuildingException("Unable to find ring closure atom in atomRefs4 of atomparity when building SMILES"); + } + else if(labelMapping.equals(FUSEDRING_LABELS_VAL)) {//fragment is a fusedring with atoms in the correct order for fused ring numbering + //this will do stuff like changing labels from 1,2,3,4,5,6,7,8,9,10->1,2,3,4,4a,5,6,7,8,8a + FragmentTools.relabelLocantsAsFusedRingSystem(atomList); + } + else{ + String[] labelMap = labelMapping.split("/", -1);//place slash delimited labels into an array + int numOfAtoms = atomList.size(); + if (labelMap.length != numOfAtoms){ + throw new StructureBuildingException("Group numbering has been invalidly defined in resource file: labels: " +labelMap.length + ", atoms: " + numOfAtoms ); + } + for (int i = 0; i < numOfAtoms; i++) { + String labels[] = labelMap[i].split(","); + for (String label : labels) { + if (label.length() > 0) { + atomList.get(i).addLocant(label); + } + } } } - stack.peek().bondOrder = 1; } private void verifyAndTakeIntoAccountLonePairsInAtomParities(List atomList) throws StructureBuildingException { for (Atom atom : atomList) { - AtomParity atomParity =atom.getAtomParity(); - if (atomParity!=null){ + AtomParity atomParity = atom.getAtomParity(); + if (atomParity != null){ Atom[] atomRefs4 = atomParity.getAtomRefs4(); - int nullAtoms =0; - int hydrogen =0; + int nullAtoms = 0; + int hydrogen = 0; for (Atom atomRefs4Atom : atomRefs4) { - if (atomRefs4Atom ==null){ + if (atomRefs4Atom == null){ nullAtoms++; } else if (atomRefs4Atom.equals(AtomParity.hydrogen)){ hydrogen++; } } - if (nullAtoms!=0){ - if (nullAtoms ==1 && hydrogen==0 && (atom.getElement().equals("S") || atom.getElement().equals("Se"))){//special case where lone pair is part of the tetrahedron + if (nullAtoms != 0){ + if (nullAtoms ==1 && hydrogen==0 && + (atom.getElement() == ChemEl.N || atom.getElement() == ChemEl.S || atom.getElement() == ChemEl.Se)){//special case where lone pair is part of the tetrahedron if (atomList.indexOf(atomRefs4[0]) < atomList.indexOf(atom)){//is there an atom in the SMILES in front of the stereocentre? atomRefs4[3] = atomRefs4[2]; atomRefs4[2] = atomRefs4[1]; @@ -654,56 +800,43 @@ private void addBondStereoElements(Fragment currentFrag) throws StructureBuildingException { Set bonds = currentFrag.getBondSet(); for (Bond centralBond : bonds) {//identify cases of E/Z stereochemistry and add appropriate bondstereo tags - if (centralBond.getOrder()==2){ - - List fromAtomBonds =centralBond.getFromAtom().getBonds(); + if (centralBond.getOrder() == 2) { + List fromAtomBonds = centralBond.getFromAtom().getBonds(); for (Bond preceedingBond : fromAtomBonds) { - if (preceedingBond.getSmilesStereochemistry()!=null){ + if (preceedingBond.getSmilesStereochemistry() != null) { List toAtomBonds = centralBond.getToAtom().getBonds(); for (Bond followingBond : toAtomBonds) { - if (followingBond.getSmilesStereochemistry()!=null){//now found a double bond surrounded by two bonds with slashs + if (followingBond.getSmilesStereochemistry() != null) {//now found a double bond surrounded by two bonds with slashs boolean upFirst; boolean upSecond; Atom atom2 = centralBond.getFromAtom(); - Atom atom1; - if (atom2 == preceedingBond.getToAtom()){ - atom1 = preceedingBond.getFromAtom(); - } - else{ - atom1 = preceedingBond.getToAtom(); - } Atom atom3 = centralBond.getToAtom(); - Atom atom4; - if (atom3 == followingBond.getFromAtom()){ - atom4 = followingBond.getToAtom(); - } - else{ - atom4 = followingBond.getFromAtom(); - } - if (preceedingBond.getSmilesStereochemistry() == SMILES_BOND_DIRECTION.LSLASH){ + Atom atom1 = preceedingBond.getOtherAtom(atom2); + Atom atom4 = followingBond.getOtherAtom(atom3); + if (preceedingBond.getSmilesStereochemistry() == SMILES_BOND_DIRECTION.LSLASH) { upFirst = preceedingBond.getToAtom() == atom2;//in normally constructed SMILES this will be the case but you could write C(/F)=C/F instead of F\C=C/F } - else if (preceedingBond.getSmilesStereochemistry() == SMILES_BOND_DIRECTION.RSLASH){ + else if (preceedingBond.getSmilesStereochemistry() == SMILES_BOND_DIRECTION.RSLASH) { upFirst = preceedingBond.getToAtom() != atom2; } else{ throw new StructureBuildingException(preceedingBond.getSmilesStereochemistry() + " is not a slash!"); } - if (followingBond.getSmilesStereochemistry() == SMILES_BOND_DIRECTION.LSLASH){ + if (followingBond.getSmilesStereochemistry() == SMILES_BOND_DIRECTION.LSLASH) { upSecond = followingBond.getFromAtom() != atom3; } - else if (followingBond.getSmilesStereochemistry() == SMILES_BOND_DIRECTION.RSLASH){ + else if (followingBond.getSmilesStereochemistry() == SMILES_BOND_DIRECTION.RSLASH) { upSecond = followingBond.getFromAtom() == atom3; } else{ throw new StructureBuildingException(followingBond.getSmilesStereochemistry() + " is not a slash!"); } BondStereoValue cisTrans = upFirst == upSecond ? BondStereoValue.CIS : BondStereoValue.TRANS; - if (centralBond.getBondStereo()!=null){ + if (centralBond.getBondStereo() != null) { //double bond has redundant specification e.g. C/C=C\\1/NC1 hence need to check it is consistent Atom[] atomRefs4 = centralBond.getBondStereo().getAtomRefs4(); - if (atomRefs4[0].equals(atom1) || atomRefs4[3].equals(atom4)){ + if (atomRefs4[0].equals(atom1) || atomRefs4[3].equals(atom4)) { if (centralBond.getBondStereo().getBondStereoValue().equals(cisTrans)){ throw new StructureBuildingException("Contradictory double bond stereoconfiguration"); } @@ -716,10 +849,10 @@ } else{ Atom[] atomRefs4= new Atom[4]; - atomRefs4[0] =atom1; - atomRefs4[1] =atom2; - atomRefs4[2] =atom3; - atomRefs4[3] =atom4; + atomRefs4[0] = atom1; + atomRefs4[1] = atom2; + atomRefs4[2] = atom3; + atomRefs4[3] = atom4; centralBond.setBondStereoElement(atomRefs4, cisTrans); } } @@ -736,39 +869,41 @@ /** * Utilises the atom's hydrogen count as set by the SMILES as well as incoming valency to determine the atom's valency * If the atom is charged whether protons have been added or removed will also need to be determined - * @param fragManager * @param atom * @throws StructureBuildingException */ - private void setupAtomValency(FragmentManager fragManager, Atom atom) throws StructureBuildingException { + private void setupAtomValency(Atom atom) throws StructureBuildingException { int hydrogenCount = atom.getProperty(Atom.SMILES_HYDROGEN_COUNT); int incomingValency = atom.getIncomingValency() + hydrogenCount +atom.getOutValency(); int charge = atom.getCharge(); int absoluteCharge =Math.abs(charge); - String element =atom.getElement(); - if (atom.hasSpareValency()){ - Integer hwValency; - if (element.equals("C")){ - hwValency =4; - } - else{ - hwValency = ValencyChecker.getHWValency(element); - if (hwValency == null){ - throw new StructureBuildingException(element +" is not expected to be aromatic!"); + ChemEl chemEl = atom.getElement(); + if (atom.hasSpareValency()) { + Integer hwValency = ValencyChecker.getHWValency(chemEl); + if (hwValency == null || absoluteCharge > 1) { + throw new StructureBuildingException(chemEl +" is not expected to be aromatic!"); + } + if (absoluteCharge != 0) { + Integer[] possibleVal = ValencyChecker.getPossibleValencies(chemEl, charge); + if (possibleVal != null && possibleVal.length > 0) { + hwValency = possibleVal[0]; + } + else { + throw new StructureBuildingException(chemEl +" with charge " + charge + " is not expected to be aromatic!"); } } - if (incomingValency < (hwValency + absoluteCharge)){ + if (incomingValency < hwValency){ incomingValency++; } } - Integer defaultVal = ValencyChecker.getDefaultValency(element); + Integer defaultVal = ValencyChecker.getDefaultValency(chemEl); if (defaultVal !=null){//s or p block element - if (defaultVal != incomingValency || charge !=0){ - if (Math.abs(incomingValency - defaultVal)==Math.abs(charge)){ + if (defaultVal != incomingValency || charge !=0) { + if (Math.abs(incomingValency - defaultVal) == absoluteCharge) { atom.setProtonsExplicitlyAddedOrRemoved(incomingValency - defaultVal); } else{ - Integer[] unchargedStableValencies = ValencyChecker.getPossibleValencies(element, 0); + Integer[] unchargedStableValencies = ValencyChecker.getPossibleValencies(chemEl, 0); boolean hasPlausibleValency =false; for (Integer unchargedStableValency : unchargedStableValencies) { if (Math.abs(incomingValency - unchargedStableValency)==Math.abs(charge)){ @@ -791,14 +926,65 @@ } } else{ - if (hydrogenCount >0){//make hydrogen explicit + if (hydrogenCount > 0){//make hydrogen explicit Fragment frag =atom.getFrag(); for (int i = 0; i < hydrogenCount; i++) { - Atom hydrogen = fragManager.createAtom("H", frag); - fragManager.createBond(atom, hydrogen, 1); + Atom hydrogen = createAtom(ChemEl.H, frag); + createBond(atom, hydrogen, 1); } } } } + + + /** + * Create a new Atom of the given element belonging to the given fragment + * @param elementSymbol + * @param frag + * @return Atom + */ + private Atom createAtom(String elementSymbol, Fragment frag) { + return createAtom(ChemEl.valueOf(elementSymbol), frag); + } + + /** + * Create a new Atom of the given element belonging to the given fragment + * @param chemEl + * @param frag + * @return Atom + */ + private Atom createAtom(ChemEl chemEl, Fragment frag) { + Atom a = new Atom(idManager.getNextID(), chemEl, frag); + frag.addAtom(a); + return a; + } + + /** + * Create a new bond between two atoms. + * The bond is associated with these atoms. + * @param fromAtom + * @param toAtom + * @param bondOrder + * @return Bond + */ + private Bond createBond(Atom fromAtom, Atom toAtom, int bondOrder) { + Bond b = new Bond(fromAtom, toAtom, bondOrder); + fromAtom.addBond(b); + toAtom.addBond(b); + fromAtom.getFrag().addBond(b); + return b; + } + + private boolean is_A_to_Z(char ch) { + return ch >= 'A' && ch <= 'Z'; + } + + private boolean is_a_to_z(char ch) { + return ch >= 'a' && ch <= 'z'; + } + + private boolean is_0_to_9(char ch){ + return ch >= '0' && ch <= '9'; + } } diff -Nru opsin-1.5.0/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/SMILESWriter.java opsin-2.3.1/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/SMILESWriter.java --- opsin-1.5.0/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/SMILESWriter.java 2013-07-21 14:02:29.000000000 +0000 +++ opsin-2.3.1/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/SMILESWriter.java 2017-07-23 20:55:18.000000000 +0000 @@ -1,11 +1,15 @@ package uk.ac.cam.ch.wwmm.opsin; +import java.util.ArrayDeque; import java.util.ArrayList; import java.util.Arrays; +import java.util.Deque; +import java.util.EnumMap; import java.util.HashMap; +import java.util.HashSet; import java.util.LinkedHashMap; -import java.util.LinkedList; import java.util.List; +import java.util.Locale; import java.util.Map; import java.util.Set; @@ -20,40 +24,46 @@ class SMILESWriter { /**The organic atoms and their allowed implicit valences in SMILES */ - private static final Map organicAtomsToStandardValencies = new HashMap(); + private static final Map organicAtomsToStandardValencies = new EnumMap(ChemEl.class); /**Closures 1-9, %10-99, 0 */ - private static final LinkedList closureSymbols = new LinkedList(); + private static final List closureSymbols = new ArrayList(); /**The available ring closure symbols, ordered from start to end in the preferred order for use.*/ - private final LinkedList availableClosureSymbols = new LinkedList(closureSymbols); + private final Deque availableClosureSymbols = new ArrayDeque(closureSymbols); /**Maps between bonds and the ring closure to use when the atom that ends the bond is encountered.*/ private final HashMap bondToClosureSymbolMap = new HashMap(); /**Maps between bonds and the atom that this bond will go to in the SMILES. Populated in the order the bonds are to be made */ - private final HashMap bondToNextAtomMap= new LinkedHashMap(); + private final HashMap bondToNextAtomMap = new LinkedHashMap(); /**The structure to be converted to SMILES*/ private final Fragment structure; /**Holds the SMILES string which is under construction*/ private final StringBuilder smilesBuilder = new StringBuilder(); + + /**Should extended SMILES be output*/ + private final boolean outputExtendedSmiles; + + /**The order atoms were traversed when creating the SMILES*/ + private List smilesOutputOrder; static { - organicAtomsToStandardValencies.put("B", new Integer[]{3}); - organicAtomsToStandardValencies.put("C", new Integer[]{4}); - organicAtomsToStandardValencies.put("N", new Integer[]{3,5});//note that OPSIN doesn't accept valency 5 nitrogen without the lambda convention - organicAtomsToStandardValencies.put("O", new Integer[]{2}); - organicAtomsToStandardValencies.put("P", new Integer[]{3,5}); - organicAtomsToStandardValencies.put("S", new Integer[]{2,4,6}); - organicAtomsToStandardValencies.put("F", new Integer[]{1}); - organicAtomsToStandardValencies.put("Cl", new Integer[]{1}); - organicAtomsToStandardValencies.put("Br", new Integer[]{1}); - organicAtomsToStandardValencies.put("I", new Integer[]{1}); + organicAtomsToStandardValencies.put(ChemEl.B, new Integer[]{3}); + organicAtomsToStandardValencies.put(ChemEl.C, new Integer[]{4}); + organicAtomsToStandardValencies.put(ChemEl.N, new Integer[]{3,5});//note that OPSIN doesn't accept valency 5 nitrogen without the lambda convention + organicAtomsToStandardValencies.put(ChemEl.O, new Integer[]{2}); + organicAtomsToStandardValencies.put(ChemEl.P, new Integer[]{3,5}); + organicAtomsToStandardValencies.put(ChemEl.S, new Integer[]{2,4,6}); + organicAtomsToStandardValencies.put(ChemEl.F, new Integer[]{1}); + organicAtomsToStandardValencies.put(ChemEl.Cl, new Integer[]{1}); + organicAtomsToStandardValencies.put(ChemEl.Br, new Integer[]{1}); + organicAtomsToStandardValencies.put(ChemEl.I, new Integer[]{1}); - organicAtomsToStandardValencies.put("R", new Integer[]{1,2,3,4,5,6,7,8,9}); + organicAtomsToStandardValencies.put(ChemEl.R, new Integer[]{1,2,3,4,5,6,7,8,9}); for (int i = 1; i <=9; i++) { closureSymbols.add(String.valueOf(i)); @@ -67,114 +77,267 @@ /** * Creates a SMILES writer for the given fragment * @param structure + * @param outputExtendedSmiles */ - SMILESWriter(Fragment structure) { - this.structure =structure; + private SMILESWriter(Fragment structure, boolean outputExtendedSmiles) { + this.structure = structure; + this.outputExtendedSmiles = outputExtendedSmiles; } /** - * Generates SMILES from the fragment the SMILESWriter was created with + * Generates SMILES for the given fragment * The following assumptions are currently made: * The fragment contains no bonds to atoms outside the fragment * Hydrogens are all explicit * Spare valency has been converted to double bonds - * @return + * @return SMILES String + */ + static String generateSmiles(Fragment structure) { + return new SMILESWriter(structure, false).writeSmiles(); + } + + /** + * Generates extended SMILES for the given fragment + * The following assumptions are currently made: + * The fragment contains no bonds to atoms outside the fragment + * Hydrogens are all explicit + * Spare valency has been converted to double bonds + * @return Extended SMILES String */ - String generateSmiles() { + static String generateExtendedSmiles(Fragment structure) { + return new SMILESWriter(structure, true).writeSmiles(); + } + + String writeSmiles() { assignSmilesOrder(); assignDoubleBondStereochemistrySlashes(); List atomList = structure.getAtomList(); - List nonProtonAtomList = createNonProtonAtomList(atomList); - int nonProtonCount = nonProtonAtomList.size(); - boolean isEmpty =true; - for (int i = 0; i < nonProtonCount; i++) { - Atom currentAtom = nonProtonAtomList.get(i); - if(currentAtom.getProperty(Atom.VISITED)==0){//new component + smilesOutputOrder = new ArrayList(atomList.size()); + + boolean isEmpty = true; + for (Atom currentAtom : atomList) { + Integer visitedDepth = currentAtom.getProperty(Atom.VISITED); + if (visitedDepth != null && visitedDepth ==0) {//new component if (!isEmpty){ smilesBuilder.append('.'); } - traverseSmiles(currentAtom, null, 0); - isEmpty =false; + traverseSmiles(currentAtom); + isEmpty = false; } } + + if (outputExtendedSmiles) { + writeExtendedSmilesLayer(); + } return smilesBuilder.toString(); } + private void writeExtendedSmilesLayer() { + List atomLabels = new ArrayList(); + List positionVariationBonds = new ArrayList(); + Integer lastLabel = null; + int attachmentPointCounter = 1; + Set seenAttachmentpoints = new HashSet(); + List polymerAttachPoints = structure.getPolymerAttachmentPoints(); + boolean isPolymer = polymerAttachPoints != null && polymerAttachPoints.size() > 0; + for (int i = 0, l = smilesOutputOrder.size(); i < l; i++) { + Atom a = smilesOutputOrder.get(i); + String homologyGroup = a.getProperty(Atom.HOMOLOGY_GROUP); + if (homologyGroup != null) { + homologyGroup = escapeExtendedSmilesLabel(homologyGroup); + if (homologyGroup.startsWith("_")) { + atomLabels.add(homologyGroup); + } + else { + atomLabels.add(homologyGroup + "_p"); + } + lastLabel = i; + } + else if (a.getElement() == ChemEl.R){ + if (isPolymer) { + atomLabels.add("star_e"); + } + else { + Integer atomClass = a.getProperty(Atom.ATOM_CLASS); + if (atomClass != null) { + seenAttachmentpoints.add(atomClass); + } + else { + do { + atomClass = attachmentPointCounter++; + } + while (seenAttachmentpoints.contains(atomClass)); + } + atomLabels.add("_AP" + String.valueOf(atomClass)); + } + lastLabel = i; + } + else { + atomLabels.add(""); + } + List atomsInPositionVariationBond = a.getProperty(Atom.POSITION_VARIATION_BOND); + if (atomsInPositionVariationBond != null) { + StringBuilder sb = new StringBuilder(); + sb.append(i); + for (int j = 0; j < atomsInPositionVariationBond.size(); j++) { + sb.append(j==0 ? ':' : '.'); + Atom referencedAtom = atomsInPositionVariationBond.get(j); + int referencedAtomIndex = smilesOutputOrder.indexOf(referencedAtom); + if (referencedAtomIndex == -1){ + throw new RuntimeException("OPSIN Bug: Failed to resolve position variation bond atom"); + } + sb.append(referencedAtomIndex); + } + positionVariationBonds.add(sb.toString()); + } + } + List extendedSmiles = new ArrayList(2); + if (lastLabel != null) { + extendedSmiles.add("$" + StringTools.stringListToString(atomLabels.subList(0, lastLabel + 1), ";") + "$" ); + } + if (positionVariationBonds.size() > 0) { + extendedSmiles.add("m:" + StringTools.stringListToString(positionVariationBonds, ",")); + } + if (isPolymer) { + StringBuilder sruContents = new StringBuilder(); + sruContents.append("Sg:n:"); + boolean appendDelimiter = false; + for (int i = 0, l = smilesOutputOrder.size(); i < l; i++) { + if (smilesOutputOrder.get(i).getElement() != ChemEl.R) { + if (appendDelimiter) { + sruContents.append(','); + } + sruContents.append(i); + appendDelimiter = true; + } + } + sruContents.append("::ht"); + extendedSmiles.add(sruContents.toString()); + } + if (extendedSmiles.size() > 0) { + smilesBuilder.append(" |"); + smilesBuilder.append(StringTools.stringListToString(extendedSmiles, ",")); + smilesBuilder.append('|'); + } + } + + private String escapeExtendedSmilesLabel(String str) { + StringBuilder sb = new StringBuilder(); + for (int i = 0, len = str.length(); i < len; i++) { + char ch = str.charAt(i); + if ((ch >= 'a' && ch <= 'z') || + (ch >= 'A' && ch <= 'Z') || + (ch >= '0' && ch <= '9') ) { + sb.append(ch); + } + else { + sb.append("&#"); + sb.append(String.valueOf((int)ch)); + sb.append(';'); + } + } + return sb.toString(); + } + /** * Walks through the fragment populating the Atom.VISITED property indicating how many bonds * an atom is from the start of the fragment walk. A new walk will be started for each disconnected component of the fragment */ private void assignSmilesOrder() { - List atomList =structure.getAtomList(); + List atomList = structure.getAtomList(); for (Atom atom : atomList) { atom.setProperty(Atom.VISITED, null); } for (Atom a : atomList) { - if(a.getProperty(Atom.VISITED)==null && !isSmilesImplicitProton(a)){//typically for all but the first atom this will be true - traverseMolecule(a, null, 0); + if(a.getProperty(Atom.VISITED) == null && !isSmilesImplicitProton(a)){//true for only the first atom in a fully connected molecule + traverseMolecule(a); } } } - + + private static class TraversalState { + private final Atom atom; + private final Bond bondTaken; + private final int depth; + + private TraversalState(Atom atom, Bond bondTaken, int depth) { + this.atom = atom; + this.bondTaken = bondTaken; + this.depth = depth; + } + } + /** - * Recursive function for populating the Atom.VISITED property + * Iterative function for populating the Atom.VISITED property * Also populates the bondToNextAtom Map - * @param currentAtom - * @param previousAtom - * @param depth + * @param startingAtom * @return */ - private void traverseMolecule(Atom currentAtom, Atom previousAtom, int depth){ - if(currentAtom.getProperty(Atom.VISITED)!=null){ - return; - } - currentAtom.setProperty(Atom.VISITED, depth); - List bonds = currentAtom.getBonds(); - for (Bond bond : bonds) { - Atom neighbour = bond.getOtherAtom(currentAtom); - if (isSmilesImplicitProton(neighbour)){ + private void traverseMolecule(Atom startingAtom){ + Deque stack = new ArrayDeque(); + stack.add(new TraversalState(startingAtom, null, 0)); + while (!stack.isEmpty()){ + TraversalState currentstate = stack.removeLast(); + Atom currentAtom = currentstate.atom; + Bond bondtaken = currentstate.bondTaken; + if (bondtaken != null) { + bondToNextAtomMap.put(bondtaken, currentAtom); + } + if(currentAtom.getProperty(Atom.VISITED) != null){ continue; } - if (neighbour.equals(previousAtom)){ - continue; + int depth = currentstate.depth; + currentAtom.setProperty(Atom.VISITED, depth); + List bonds = currentAtom.getBonds(); + for (int i = bonds.size() - 1; i >=0; i--) { + Bond bond = bonds.get(i); + if (bond.equals(bondtaken)){ + continue; + } + Atom neighbour = bond.getOtherAtom(currentAtom); + if (isSmilesImplicitProton(neighbour)){ + continue; + } + stack.add(new TraversalState(neighbour, bond, depth + 1)); } - bondToNextAtomMap.put(bond, neighbour); - traverseMolecule(neighbour, currentAtom, depth+1); } } private boolean isSmilesImplicitProton(Atom atom) { - if (!atom.getElement().equals("H") || (atom.getIsotope()!=null && atom.getIsotope()!=1) ){ + if (atom.getElement() != ChemEl.H){ + //not hydrogen return false; } - else{ - List neighbours = atom.getAtomNeighbours(); - //special case where hydrogen is bridging - if (neighbours.size() > 1){ - return false; - } - //special case where hydrogen is a counter ion or only connects to other hydrogen and/or R-groups - boolean foundHeavyAtomNeighbour =false; - for (Atom neighbour : neighbours) { - String element = neighbour.getElement(); - if (!element.equals("H") && !element.equals("R")){ - foundHeavyAtomNeighbour =true; - } - } - if (!foundHeavyAtomNeighbour){ - return false; - } - - //special case where hydrogen is connected to a nitrogen with imine double bond stereochemistry - if (neighbours.get(0).getElement().equals("N")){ - List bondsFromNitrogen = neighbours.get(0).getBonds(); - if (bondsFromNitrogen.size()==2){ - for (Bond bond : bondsFromNitrogen) { - if (bond.getBondStereo()!=null){ - return false; - } + if (atom.getIsotope() != null && atom.getIsotope() != 1){ + //deuterium/tritium + return false; + } + List neighbours = atom.getAtomNeighbours(); + int neighbourCount = neighbours.size(); + if (neighbourCount > 1){ + //bridging hydrogen + return false; + } + if (neighbourCount == 0){ + //just a hydrogen atom + return false; + } + + Atom neighbour = neighbours.get(0); + ChemEl chemEl = neighbour.getElement(); + if (chemEl == ChemEl.H || chemEl == ChemEl.R) { + //only connects to hydrogen or an R-group + return false; + } + if (chemEl == ChemEl.N){ + List bondsFromNitrogen = neighbour.getBonds(); + if (bondsFromNitrogen.size() == 2){ + for (Bond bond : bondsFromNitrogen) { + if (bond.getBondStereo() != null){ + //special case where hydrogen is connected to a nitrogen with imine double bond stereochemistry + return false; } } } @@ -201,7 +364,7 @@ Bond bond1 = atomRefs4[0].getBondToAtom(atomRefs4[1]); Bond bond2 = atomRefs4[2].getBondToAtom(atomRefs4[3]); if (bond1 ==null || bond2==null){ - throw new RuntimeException("Bondstereo described atoms that are not bonded:" +bondStereo.toCML().toXML()); + throw new RuntimeException("OPSIN Bug: Bondstereo described atoms that are not bonded"); } Atom bond1ToAtom = bondToNextAtomMap.get(bond1); Atom bond2ToAtom = bondToNextAtomMap.get(bond2); @@ -304,76 +467,89 @@ } } - private List createNonProtonAtomList(List atomList) { - List nonProtonAtomList = new ArrayList(); - for (Atom atom : atomList) { - if (atom.getProperty(Atom.VISITED)!=null){ - nonProtonAtomList.add(atom); - } - } - return nonProtonAtomList; - } - + + private static final TraversalState startBranch = new TraversalState(null, null, -1); + private static final TraversalState endBranch = new TraversalState(null, null, -1); + /** - * Generates the SMILES for the currentAtom and its bonds and then is called recursively to explore the atom's neighbours - * @param currentAtom - * @param previousAtom - * @param depth + * Generates the SMILES starting from the currentAtom, iteratively exploring + * in the same order as {@link SMILESWriter#traverseMolecule(Atom)} + * @param startingAtom */ - private void traverseSmiles(Atom currentAtom, Atom previousAtom, int depth){ - smilesBuilder.append(atomToSmiles(currentAtom, depth, previousAtom)); - List bonds = currentAtom.getBonds(); - LinkedList newlyAvailableClosureSymbols = null; - for (Bond bond : bonds) {//ring closures - Atom neighbour = bond.getOtherAtom(currentAtom); - Integer nDepth = neighbour.getProperty(Atom.VISITED); - if (nDepth!=null && nDepth<=depth && !neighbour.equals(previousAtom)){ - String closure = bondToClosureSymbolMap.get(bond); - smilesBuilder.append(closure); - if (newlyAvailableClosureSymbols == null){ - newlyAvailableClosureSymbols = new LinkedList(); - } - newlyAvailableClosureSymbols.addFirst(closure); + private void traverseSmiles(Atom startingAtom){ + Deque stack = new ArrayDeque(); + stack.add(new TraversalState(startingAtom, null, 0)); + while (!stack.isEmpty()){ + TraversalState currentstate = stack.removeLast(); + if (currentstate == startBranch){ + smilesBuilder.append('('); + continue; } - } - for (Bond bond : bonds) {//ring openings - Atom neighbour = bond.getOtherAtom(currentAtom); - Integer nDepth = neighbour.getProperty(Atom.VISITED); - if (nDepth!=null && nDepth > (depth +1)){ - String closure = availableClosureSymbols.removeFirst(); - bondToClosureSymbolMap.put(bond, closure); - smilesBuilder.append(bondToSmiles(bond)); - smilesBuilder.append(closure); + if (currentstate == endBranch){ + smilesBuilder.append(')'); + continue; } - } - if (newlyAvailableClosureSymbols != null){ - for (String closure : newlyAvailableClosureSymbols) { - availableClosureSymbols.addFirst(closure); + Atom currentAtom = currentstate.atom; + Bond bondtaken = currentstate.bondTaken; + if (bondtaken != null){ + smilesBuilder.append(bondToSmiles(bondtaken)); + } + int depth = currentstate.depth; + + smilesBuilder.append(atomToSmiles(currentAtom, depth, bondtaken)); + smilesOutputOrder.add(currentAtom); + List bonds = currentAtom.getBonds(); + List newlyAvailableClosureSymbols = null; + for (Bond bond : bonds) {//ring closures + if (bond.equals(bondtaken)) { + continue; + } + Atom neighbour = bond.getOtherAtom(currentAtom); + Integer nDepth = neighbour.getProperty(Atom.VISITED); + if (nDepth != null && nDepth <= depth){ + String closure = bondToClosureSymbolMap.get(bond); + smilesBuilder.append(closure); + if (newlyAvailableClosureSymbols == null){ + newlyAvailableClosureSymbols = new ArrayList(); + } + newlyAvailableClosureSymbols.add(closure); + } + } + for (Bond bond : bonds) {//ring openings + Atom neighbour = bond.getOtherAtom(currentAtom); + Integer nDepth = neighbour.getProperty(Atom.VISITED); + if (nDepth != null && nDepth > (depth +1)){ + String closure = availableClosureSymbols.removeFirst(); + bondToClosureSymbolMap.put(bond, closure); + smilesBuilder.append(bondToSmiles(bond)); + smilesBuilder.append(closure); + } } - } - // count outgoing edges - int count = 0; - for (Bond bond : bonds) { - Atom neighbour = bond.getOtherAtom(currentAtom); - Integer nDepth = neighbour.getProperty(Atom.VISITED); - if (nDepth!=null && nDepth==depth+1){ - count++; + if (newlyAvailableClosureSymbols != null) { + //By not immediately adding to availableClosureSymbols we avoid using the same digit + //to both close and open on the same atom + for (int i = newlyAvailableClosureSymbols.size() -1; i >=0; i--) { + availableClosureSymbols.addFirst(newlyAvailableClosureSymbols.get(i)); + } } - } - for (Bond bond : bonds) {//adjacent atoms which have not been previously written - Atom neighbour = bond.getOtherAtom(currentAtom); - Integer nDepth = neighbour.getProperty(Atom.VISITED); - if (nDepth!=null && nDepth==depth+1){ - if (count > 1){ - smilesBuilder.append('('); - } - smilesBuilder.append(bondToSmiles(bond)); - traverseSmiles(neighbour,currentAtom,depth+1); - if (count > 1){ - smilesBuilder.append(')'); - count--; + boolean seenFirstBranch = false; + for (int i = bonds.size() - 1; i >=0; i--) { + //adjacent atoms which have not been previously written + Bond bond = bonds.get(i); + Atom neighbour = bond.getOtherAtom(currentAtom); + Integer nDepth = neighbour.getProperty(Atom.VISITED); + if (nDepth != null && nDepth == depth + 1){ + if (!seenFirstBranch){ + stack.add(new TraversalState(neighbour, bond, depth + 1)); + seenFirstBranch = true; + } + else { + stack.add(endBranch); + stack.add(new TraversalState(neighbour, bond, depth + 1)); + stack.add(startBranch); + } } } } @@ -384,57 +560,61 @@ * Where possible square brackets are not included to give more readable SMILES * @param atom * @param depth - * @param previousAtom + * @param bondtaken * @return */ - private String atomToSmiles(Atom atom, int depth, Atom previousAtom) { + private String atomToSmiles(Atom atom, int depth, Bond bondtaken) { StringBuilder atomSmiles = new StringBuilder(); - int hydrogen =calculateNumberOfBondedExplicitHydrogen(atom); - boolean needsSquareBrackets = determineWhetherAtomNeedsSquareBrackets(atom, hydrogen); - if (needsSquareBrackets){ + int hydrogenCount = calculateNumberOfBondedExplicitHydrogen(atom); + boolean needsSquareBrackets = determineWhetherAtomNeedsSquareBrackets(atom, hydrogenCount); + if (needsSquareBrackets) { atomSmiles.append('['); } - if (atom.getIsotope()!=null){ + if (atom.getIsotope() != null) { atomSmiles.append(atom.getIsotope()); } - String elementSymbol =atom.getElement(); - if (atom.hasSpareValency()){//spare valency corresponds directly to lower case SMILES in OPSIN's SMILES reader - atomSmiles.append(elementSymbol.toLowerCase()); + ChemEl chemEl = atom.getElement(); + if (chemEl == ChemEl.R) {//used for polymers + atomSmiles.append('*'); } else{ - if (elementSymbol.equals("R")){//used for polymers - atomSmiles.append('*'); + if (atom.hasSpareValency()) {//spare valency corresponds directly to lower case SMILES in OPSIN's SMILES reader + atomSmiles.append(chemEl.toString().toLowerCase(Locale.ROOT)); } else{ - atomSmiles.append(elementSymbol); + atomSmiles.append(chemEl.toString()); } } - if (atom.getAtomParity()!=null){ - atomSmiles.append(atomParityToSmiles(atom, depth, previousAtom)); + if (atom.getAtomParity() != null){ + atomSmiles.append(atomParityToSmiles(atom, depth, bondtaken)); } - if (hydrogen !=0 && needsSquareBrackets && !elementSymbol.equals("H")){ + if (hydrogenCount != 0 && needsSquareBrackets && chemEl != ChemEl.H){ atomSmiles.append('H'); - if (hydrogen !=1){ - atomSmiles.append(String.valueOf(hydrogen)); + if (hydrogenCount != 1){ + atomSmiles.append(String.valueOf(hydrogenCount)); } } int charge = atom.getCharge(); if (charge != 0){ - if (charge==1){ + if (charge == 1){ atomSmiles.append('+'); } - else if (charge==-1){ + else if (charge == -1){ atomSmiles.append('-'); } else{ - if (charge>0){ + if (charge > 0){ atomSmiles.append('+'); } atomSmiles.append(charge); } } - //atomSmiles.append("[id:"+atom.getID()+"]"); - if (needsSquareBrackets){ + if (needsSquareBrackets) { + Integer atomClass = atom.getProperty(Atom.ATOM_CLASS); + if (atomClass != null) { + atomSmiles.append(':'); + atomSmiles.append(String.valueOf(atomClass)); + } atomSmiles.append(']'); } return atomSmiles.toString(); @@ -442,9 +622,9 @@ private int calculateNumberOfBondedExplicitHydrogen(Atom atom) { List neighbours = atom.getAtomNeighbours(); - int count =0; + int count = 0; for (Atom neighbour : neighbours) { - if (neighbour.getProperty(Atom.VISITED)==null){ + if (neighbour.getProperty(Atom.VISITED) == null){ count++; } } @@ -456,13 +636,13 @@ if (expectedValencies == null){ return true; } - if (atom.getCharge()!=0){ + if (atom.getCharge() != 0){ return true; } - if (atom.getIsotope()!=null){ + if (atom.getIsotope() != null){ return true; } - if (atom.getAtomParity()!=null){ + if (atom.getAtomParity() != null){ return true; } @@ -470,15 +650,15 @@ boolean valencyCanBeDescribedImplicitly = Arrays.binarySearch(expectedValencies, valency) >= 0; int targetImplicitValency =valency; if (valency > expectedValencies[expectedValencies.length-1]){ - valencyCanBeDescribedImplicitly =true; + valencyCanBeDescribedImplicitly = true; } if (!valencyCanBeDescribedImplicitly){ return true; } - int nonHydrogenValency = valency -hydrogenCount; + int nonHydrogenValency = valency - hydrogenCount; int implicitValencyThatWouldBeGenerated = nonHydrogenValency; - for (int i = expectedValencies.length-1; i>=0; i--) { + for (int i = expectedValencies.length - 1; i >= 0; i--) { if (expectedValencies[i] >= nonHydrogenValency){ implicitValencyThatWouldBeGenerated =expectedValencies[i]; } @@ -486,59 +666,64 @@ if (targetImplicitValency != implicitValencyThatWouldBeGenerated){ return true; } + if (atom.getProperty(Atom.ATOM_CLASS) != null) { + return true; + } return false; } - private String atomParityToSmiles(Atom currentAtom, int depth, Atom previousAtom) { + private String atomParityToSmiles(Atom currentAtom, int depth, Bond bondtaken) { AtomParity atomParity = currentAtom.getAtomParity(); - StringBuilder tetrahedralStereoChem = new StringBuilder(); Atom[] atomRefs4 = atomParity.getAtomRefs4().clone(); List atomrefs4Current = new ArrayList(); - List bonds = currentAtom.getBonds(); - for (Bond bond : bonds) {//previous atom - Atom neighbour = bond.getOtherAtom(currentAtom); - if (neighbour.getProperty(Atom.VISITED)!=null && neighbour.equals(previousAtom) ){ - atomrefs4Current.add(neighbour); - } + if (bondtaken != null) {//previous atom + Atom neighbour = bondtaken.getOtherAtom(currentAtom); + atomrefs4Current.add(neighbour); } + for (Atom atom : atomRefs4) {//lone pair as in tetrahedral sulfones if (atom.equals(currentAtom)){ atomrefs4Current.add(currentAtom); } } + + List bonds = currentAtom.getBonds(); for (Bond bond : bonds) {//implicit hydrogen Atom neighbour = bond.getOtherAtom(currentAtom); - if (neighbour.getProperty(Atom.VISITED)==null){ + if (neighbour.getProperty(Atom.VISITED) == null){ atomrefs4Current.add(currentAtom); } } for (Bond bond : bonds) {//ring closures + if (bond.equals(bondtaken)){ + continue; + } Atom neighbour = bond.getOtherAtom(currentAtom); - if (neighbour.getProperty(Atom.VISITED)==null){ + if (neighbour.getProperty(Atom.VISITED) == null){ continue; } - if (neighbour.getProperty(Atom.VISITED)<=depth && !neighbour.equals(previousAtom) ){ + if (neighbour.getProperty(Atom.VISITED) <= depth){ atomrefs4Current.add(neighbour); } } for (Bond bond : bonds) {//ring openings Atom neighbour = bond.getOtherAtom(currentAtom); - if (neighbour.getProperty(Atom.VISITED)==null){ + if (neighbour.getProperty(Atom.VISITED) == null){ continue; } - if (neighbour.getProperty(Atom.VISITED)> (depth +1)){ + if (neighbour.getProperty(Atom.VISITED) > (depth +1)){ atomrefs4Current.add(neighbour); } } for (Bond bond : bonds) {//next atom/s Atom neighbour = bond.getOtherAtom(currentAtom); - if (neighbour.getProperty(Atom.VISITED)==null){ + if (neighbour.getProperty(Atom.VISITED) == null){ continue; } - if (neighbour.getProperty(Atom.VISITED)==depth+1){ + if (neighbour.getProperty(Atom.VISITED) == depth + 1){ atomrefs4Current.add(neighbour); } } @@ -547,39 +732,38 @@ atomrefs4CurrentArr[i] = atomrefs4Current.get(i); } for (int i = 0; i < atomRefs4.length; i++) {//replace mentions of explicit hydrogen with the central atom the hydrogens are attached to, to be consistent with the SMILES representation - if (atomRefs4[i].getProperty(Atom.VISITED)==null){ + if (atomRefs4[i].getProperty(Atom.VISITED) == null){ atomRefs4[i] = currentAtom; } } boolean equivalent = StereochemistryHandler.checkEquivalencyOfAtomsRefs4AndParity(atomRefs4, atomParity.getParity(), atomrefs4CurrentArr, 1); if (equivalent){ - tetrahedralStereoChem.append("@@"); + return "@@"; } else{ - tetrahedralStereoChem.append("@"); + return "@"; } - return tetrahedralStereoChem.toString(); } /** * Generates the SMILES description of the bond - * In the case of cis/trans stereochemistry this relies on the assignDoubleBondStereochemistrySlashes + * In the case of cis/trans stereochemistry this relies on the {@link SMILESWriter#assignDoubleBondStereochemistrySlashes} * having been run to setup the smilesBondDirection attribute * @param bond * @return */ private String bondToSmiles(Bond bond){ - String bondSmiles =""; + String bondSmiles = ""; int bondOrder = bond.getOrder(); - if (bondOrder==2){ - bondSmiles ="="; + if (bondOrder == 2){ + bondSmiles = "="; } - else if (bondOrder==3){ - bondSmiles ="#"; + else if (bondOrder == 3){ + bondSmiles = "#"; } - else if (bond.getSmilesStereochemistry()!=null){ - if (bond.getSmilesStereochemistry()==SMILES_BOND_DIRECTION.RSLASH){ + else if (bond.getSmilesStereochemistry() != null){ + if (bond.getSmilesStereochemistry() == SMILES_BOND_DIRECTION.RSLASH){ bondSmiles ="/"; } else{ diff -Nru opsin-1.5.0/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/SortParses.java opsin-2.3.1/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/SortParses.java --- opsin-1.5.0/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/SortParses.java 2013-07-21 14:02:29.000000000 +0000 +++ opsin-2.3.1/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/SortParses.java 2017-07-23 20:55:18.000000000 +0000 @@ -5,14 +5,12 @@ import java.util.Comparator; -import nu.xom.Element; - /** * Prefer non-substituent word rules to substituent word rule e.g. ethylene is C=C not -CC- * Prefer the parse with the least elements that have 0 children e.g. benzal beats benz al (1 childless element vs 2 childless elements) * Prefer less elements e.g. beats */ -class SortParses implements Comparator{ +class SortParses implements Comparator { public int compare(Element el1, Element el2){ boolean isSubstituent1 = WordRule.substituent.toString().equals(el1.getFirstChildElement(WORDRULE_EL).getAttributeValue(WORDRULE_ATR)); boolean isSubstituent2 = WordRule.substituent.toString().equals(el2.getFirstChildElement(WORDRULE_EL).getAttributeValue(WORDRULE_ATR)); @@ -23,8 +21,8 @@ return -1; } - int[] counts1 = XOMTools.countNumberOfElementsAndNumberOfChildLessElements(el1); - int[] counts2 = XOMTools.countNumberOfElementsAndNumberOfChildLessElements(el2); + int[] counts1 = OpsinTools.countNumberOfElementsAndNumberOfChildLessElements(el1); + int[] counts2 = OpsinTools.countNumberOfElementsAndNumberOfChildLessElements(el2); int childLessElementsInEl1 = counts1[1]; int childLessElementsInEl2 = counts2[1]; if ( childLessElementsInEl1> childLessElementsInEl2){ diff -Nru opsin-1.5.0/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/SSSRFinder.java opsin-2.3.1/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/SSSRFinder.java --- opsin-1.5.0/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/SSSRFinder.java 2013-07-21 14:02:29.000000000 +0000 +++ opsin-2.3.1/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/SSSRFinder.java 2017-07-23 20:55:18.000000000 +0000 @@ -43,7 +43,7 @@ /** get list of rings. * not necessarily SSSR * @param atomSet - * @return list of rings + * @return list of rings */ private static List getRings(List atomSet ){ List ringList = new ArrayList(); @@ -72,8 +72,7 @@ List mergedBondSet = symmetricDifference (bondSet0, bondSet1); mergedBondSet.add(bond); - Ring ring = new Ring(mergedBondSet); - return ring; + return new Ring(mergedBondSet); } private static List getAncestors1(Atom atom, Map atomToParentMap){ @@ -100,9 +99,8 @@ usedAtoms.add(atom); atomToParentMap.put(atom, parentAtom); List ligandAtomList = atom.getAtomNeighbours(); - - for (int i = 0; i < ligandAtomList.size(); i++) { - Atom ligandAtom = ligandAtomList.get(i); + + for (Atom ligandAtom : ligandAtomList) { if (ligandAtom.equals(parentAtom)) { // skip existing bond } else if (usedAtoms.contains(ligandAtom)) { @@ -134,22 +132,20 @@ } - private static List symmetricDifference(List bondSet1, List bondSet2) { - List newBondSet = new ArrayList(); - - for (int i = 0; i < bondSet1.size(); i++) { - if (!bondSet2.contains(bondSet1.get(i))) { - newBondSet.add(bondSet1.get(i)); - } - } - for (int i = 0; i < bondSet2.size(); i++) { - Bond bond = bondSet2.get(i); - if (!bondSet1.contains(bond)) { - newBondSet.add(bond); - } - } + private static List symmetricDifference(List bondSet1, List bondSet2) { + List newBondSet = new ArrayList(); - return newBondSet; - } + for (Bond bond1 : bondSet1) { + if (!bondSet2.contains(bond1)) { + newBondSet.add(bond1); + } + } + for (Bond bond2 : bondSet2) { + if (!bondSet1.contains(bond2)) { + newBondSet.add(bond2); + } + } + return newBondSet; + } } diff -Nru opsin-1.5.0/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/StereoAnalyser.java opsin-2.3.1/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/StereoAnalyser.java --- opsin-1.5.0/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/StereoAnalyser.java 2013-07-21 14:02:29.000000000 +0000 +++ opsin-2.3.1/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/StereoAnalyser.java 2017-07-23 20:55:18.000000000 +0000 @@ -1,6 +1,8 @@ package uk.ac.cam.ch.wwmm.opsin; import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collection; import java.util.Collections; import java.util.Comparator; import java.util.HashMap; @@ -16,17 +18,18 @@ * */ class StereoAnalyser { + /** The atoms/bonds upon which this StereoAnalyser is operating */ + private final Collection atoms; + private final Collection bonds; + /** Maps each atom to its currently assigned colour. Eventually all atoms in non identical environments will have different colours. Higher is higher priority*/ private final Map mappingToColour; - /** Maps each atom to a list of of the colours of its neighbours*/ - private final Map> atomNeighbourColours; - - /** The molecule upon which this StereoAnalyser is operating */ - private final Fragment molecule; + /** Maps each atom to an array of the colours of its neighbours*/ + private final Map atomNeighbourColours; - private final AtomColourThenNeighbouringColoursComparator atomColourThenNeighbouringColoursComparator; - private final static AtomicNumberThenAtomicMassComparator atomicNumberThenAtomicMassComparator = new AtomicNumberThenAtomicMassComparator(); + private final AtomNeighbouringColoursComparator atomNeighbouringColoursComparator = new AtomNeighbouringColoursComparator(); + private static final AtomicNumberThenAtomicMassComparator atomicNumberThenAtomicMassComparator = new AtomicNumberThenAtomicMassComparator(); /** * Holds information about a tetrahedral stereocentre @@ -59,8 +62,8 @@ return trueStereoCentre; } - List getCipOrderedAtoms() { - List cipOrderedAtoms = new CipSequenceRules(stereoAtom).getNeighbouringAtomsInCIPOrder(); + List getCipOrderedAtoms() throws CipOrderingException { + List cipOrderedAtoms = new CipSequenceRules(stereoAtom).getNeighbouringAtomsInCipOrder(); if (cipOrderedAtoms.size()==3){//lone pair is the 4th. This is represented by the atom itself and is always the lowest priority cipOrderedAtoms.add(0, stereoAtom); } @@ -89,12 +92,13 @@ * other atom in bond * Highest CIP atom on other side * @return + * @throws CipOrderingException */ - List getOrderedStereoAtoms() { + List getOrderedStereoAtoms() throws CipOrderingException { Atom a1 = bond.getFromAtom(); Atom a2 = bond.getToAtom(); - List cipOrdered1 = new CipSequenceRules(a1).getNeighbouringAtomsInCIPOrderIgnoringGivenNeighbour(a2); - List cipOrdered2 = new CipSequenceRules(a2).getNeighbouringAtomsInCIPOrderIgnoringGivenNeighbour(a1); + List cipOrdered1 = new CipSequenceRules(a1).getNeighbouringAtomsInCipOrderIgnoringGivenNeighbour(a2); + List cipOrdered2 = new CipSequenceRules(a2).getNeighbouringAtomsInCipOrderIgnoringGivenNeighbour(a1); List stereoAtoms = new ArrayList(); stereoAtoms.add(cipOrdered1.get(cipOrdered1.size()-1));//highest CIP adjacent to a1 stereoAtoms.add(a1); @@ -117,8 +121,8 @@ } private static int compareAtomicNumberThenAtomicMass(Atom a, Atom b){ - int atomicNumber1 = AtomProperties.elementToAtomicNumber.get(a.getElement()); - int atomicNumber2 = AtomProperties.elementToAtomicNumber.get(b.getElement()); + int atomicNumber1 = a.getElement().ATOMIC_NUM; + int atomicNumber2 = b.getElement().ATOMIC_NUM; if (atomicNumber1 > atomicNumber2){ return 1; } @@ -145,47 +149,40 @@ } /** - * Initially sorts on the atoms' colour and if these are the same then - * sorts based on the list of colours for neighbouring atoms + * Sorts based on the list of colours for neighbouring atoms * e.g. [1,2] > [1,1] [1,1,3] > [2,2,2] [1,1,3] > [3] * @author dl387 * */ - private class AtomColourThenNeighbouringColoursComparator implements Comparator { + private class AtomNeighbouringColoursComparator implements Comparator { public int compare(Atom a, Atom b){ - int colour1 = mappingToColour.get(a); - int colour2 = mappingToColour.get(b); - if (colour1 > colour2){ - return 1; - } - else if (colour1 < colour2){ - return -1; - } - List colours1 = atomNeighbourColours.get(a); - List colours2 = atomNeighbourColours.get(b); + int[] colours1 = atomNeighbourColours.get(a); + int[] colours2 = atomNeighbourColours.get(b); - int colours1Size = colours1.size(); - int colours2Size = colours2.size(); - int differenceInSize = colours1Size - colours2Size; - int maxCommonColourSize = colours1Size > colours2Size ? colours2Size : colours1Size; + int colours1Size = colours1.length; + int colours2Size = colours2.length; + + int maxCommonColourSize = Math.min(colours1Size, colours2Size); for (int i = 1; i <= maxCommonColourSize; i++) { - int difference = colours1.get(colours1Size -i) - colours2.get(colours2Size -i); - if (difference >0){ + int difference = colours1[colours1Size - i] - colours2[colours2Size - i]; + if (difference > 0){ return 1; } if (difference < 0){ return -1; } } - if (differenceInSize >0){ + int differenceInSize = colours1Size - colours2Size; + if (differenceInSize > 0){ return 1; } - if (differenceInSize <0){ + if (differenceInSize < 0){ return -1; } return 0; } } + /** * Employs a derivative of the InChI algorithm to label which atoms are equivalent. * These labels can then be used by the findStereo(Atoms/Bonds) functions to find features that @@ -193,65 +190,79 @@ * @param molecule */ StereoAnalyser(Fragment molecule) { - this.molecule = molecule; - atomColourThenNeighbouringColoursComparator = new AtomColourThenNeighbouringColoursComparator(); - addGhostAtoms(); - List atomList = molecule.getAtomList(); - mappingToColour = new HashMap(atomList.size()); - atomNeighbourColours = new HashMap>(atomList.size()); - Collections.sort(atomList, atomicNumberThenAtomicMassComparator); - populateColoursByAtomicNumberAndMass(atomList); - + this (molecule.getAtomList(), molecule.getBondSet()); + } + + /** + * Employs a derivative of the InChI algorithm to label which atoms are equivalent. + * These labels can then be used by the findStereo(Atoms/Bonds) functions to find features that + * can possess stereoChemistry + * NOTE: All bonds of every atom must be in the set of bonds, no atom may have a bond to an atom not in the list + * @param atoms + * @param bonds + */ + StereoAnalyser(Collection atoms, Collection bonds) { + this.atoms = atoms; + this.bonds = bonds; + List ghostAtoms = addGhostAtoms(); + List atomsToSort = new ArrayList(atoms); + atomsToSort.addAll(ghostAtoms); + mappingToColour = new HashMap(atomsToSort.size()); + atomNeighbourColours = new HashMap(atomsToSort.size()); + Collections.sort(atomsToSort, atomicNumberThenAtomicMassComparator); + List> groupsByColour = populateColoursByAtomicNumberAndMass(atomsToSort); boolean changeFound = true; while(changeFound){ - for (Atom atom : atomList) { - List neighbourColours = findColourOfNeighbours(atom); - atomNeighbourColours.put(atom, neighbourColours); + for (List groupWithAColour : groupsByColour) { + for (Atom atom : groupWithAColour) { + int[] neighbourColours = findColourOfNeighbours(atom); + atomNeighbourColours.put(atom, neighbourColours); + } } - Collections.sort(atomList, atomColourThenNeighbouringColoursComparator); - changeFound = populateColoursAndReportIfColoursWereChanged(atomList); + List> updatedGroupsByColour = new ArrayList>(); + changeFound = populateColoursAndReportIfColoursWereChanged(groupsByColour, updatedGroupsByColour); + groupsByColour = updatedGroupsByColour; } - removeGhostAtoms(); + removeGhostAtoms(ghostAtoms); } /** - * Adds "ghost" atoms in the sam ways as the CIP rules for handling double bonds + * Adds "ghost" atoms in the same way as the CIP rules for handling double bonds * e.g. C=C --> C(G)=C(G) where ghost is a carbon with no hydrogen bonded to it + * @return The ghost atoms created */ - private void addGhostAtoms() { - Set bonds = molecule.getBondSet(); - int ghostIdCounter = -1; + private List addGhostAtoms() { + List ghostAtoms = new ArrayList(); for (Bond bond : bonds) { int bondOrder = bond.getOrder(); - for (int i = bondOrder; i >1; i--) { - Atom fromAtom =bond.getFromAtom(); - Atom toAtom =bond.getToAtom(); + for (int i = bondOrder; i > 1; i--) { + Atom fromAtom = bond.getFromAtom(); + Atom toAtom = bond.getToAtom(); - Atom ghost1 = new Atom(ghostIdCounter--, fromAtom.getElement(), molecule); + Atom ghost1 = new Atom(fromAtom.getElement()); Bond b1 = new Bond(ghost1, toAtom, 1); toAtom.addBond(b1); ghost1.addBond(b1); - molecule.addAtom(ghost1); - Atom ghost2 = new Atom(ghostIdCounter--, toAtom.getElement(), molecule); + ghostAtoms.add(ghost1); + + Atom ghost2 = new Atom(toAtom.getElement()); Bond b2 = new Bond(ghost2, fromAtom, 1); fromAtom.addBond(b2); ghost2.addBond(b2); - molecule.addAtom(ghost2); + ghostAtoms.add(ghost2); } } + return ghostAtoms; } /** * Removes the ghost atoms added by addGhostAtoms + * @param ghostAtoms */ - private void removeGhostAtoms() { - List atomList = molecule.getAtomList(); - for (Atom atom : atomList) { - if (atom.getID() < 0){ - Atom adjacentAtom = atom.getAtomNeighbours().get(0); - adjacentAtom.removeBond(atom.getFirstBond()); - molecule.removeAtom(atom); - } + private void removeGhostAtoms(List ghostAtoms) { + for (Atom atom : ghostAtoms) { + Bond b = atom.getFirstBond(); + b.getOtherAtom(atom).removeBond(b); } } @@ -260,79 +271,93 @@ * Takes a list of atoms sorted by atomic number/mass * and populates the mappingToColour map * @param atomList + * @return */ - private void populateColoursByAtomicNumberAndMass(List atomList) { - Atom lastAtom = null; + private List> populateColoursByAtomicNumberAndMass(List atomList) { + List> groupsByColour = new ArrayList>(); + Atom previousAtom = null; List atomsOfThisColour = new ArrayList(); int atomsSeen = 0; for (Atom atom : atomList) { - if (lastAtom!=null && compareAtomicNumberThenAtomicMass(lastAtom, atom)!=0){ - for (Atom a2 : atomsOfThisColour) { - mappingToColour.put(a2, atomsSeen); + if (previousAtom != null && compareAtomicNumberThenAtomicMass(previousAtom, atom) != 0){ + for (Atom atomOfthisColour : atomsOfThisColour) { + mappingToColour.put(atomOfthisColour, atomsSeen); } + groupsByColour.add(atomsOfThisColour); atomsOfThisColour = new ArrayList(); } - lastAtom = atom; + previousAtom = atom; atomsOfThisColour.add(atom); atomsSeen++; } if (!atomsOfThisColour.isEmpty()){ - for (Atom a2 : atomsOfThisColour) { - mappingToColour.put(a2, atomsSeen); + for (Atom atomOfThisColour : atomsOfThisColour) { + mappingToColour.put(atomOfThisColour, atomsSeen); } + groupsByColour.add(atomsOfThisColour); } + return groupsByColour; } - + /** - * Takes a list of atoms sorted by colour/the colour of their neighbours + * Takes the lists of atoms pre-grouped by colour and sorts each by its neighbours colours + * The updatedGroupsByColour is populated with those for which this process caused a change * and populates the mappingToColour map - * Returns whether mappingToColour was changed - * @param atomList + * Returns whether mappingToColour was changed + * @param groupsByColour + * @param updatedGroupsByColour * @return boolean Whether mappingToColour was changed */ - private boolean populateColoursAndReportIfColoursWereChanged(List atomList) { - Atom previousAtom = atomList.get(0); - List atomsOfThisColour = new ArrayList(); - int atomsSeen =0; + private boolean populateColoursAndReportIfColoursWereChanged(List> groupsByColour, List> updatedGroupsByColour) { boolean changeFound = false; - for (Atom atom : atomList) { - if (atomColourThenNeighbouringColoursComparator.compare(previousAtom, atom)!=0){ - for (Atom atomOfThisColour : atomsOfThisColour) { - if (!changeFound && atomsSeen != mappingToColour.get(atomOfThisColour)){ - changeFound =true; + int atomsSeen = 0; + for (List groupWithAColour : groupsByColour) { + Collections.sort(groupWithAColour, atomNeighbouringColoursComparator); + Atom previousAtom = null; + List atomsOfThisColour = new ArrayList(); + for (Atom atom : groupWithAColour) { + if (previousAtom != null && atomNeighbouringColoursComparator.compare(previousAtom, atom) != 0){ + for (Atom atomOfThisColour : atomsOfThisColour) { + if (!changeFound && atomsSeen != mappingToColour.get(atomOfThisColour)){ + changeFound = true; + } + mappingToColour.put(atomOfThisColour, atomsSeen); } - mappingToColour.put(atomOfThisColour, atomsSeen); + updatedGroupsByColour.add(atomsOfThisColour); + atomsOfThisColour = new ArrayList(); } previousAtom = atom; - atomsOfThisColour = new ArrayList(); + atomsOfThisColour.add(atom); + atomsSeen++; } - atomsOfThisColour.add(atom); - atomsSeen++; - } - if (!atomsOfThisColour.isEmpty()){ - for (Atom atomOfThisColour : atomsOfThisColour) { - if (!changeFound && atomsSeen != mappingToColour.get(atomOfThisColour)){ - changeFound =true; + if (!atomsOfThisColour.isEmpty()){ + for (Atom atomOfThisColour : atomsOfThisColour) { + if (!changeFound && atomsSeen != mappingToColour.get(atomOfThisColour)){ + changeFound = true; + } + mappingToColour.put(atomOfThisColour, atomsSeen); } - mappingToColour.put(atomOfThisColour, atomsSeen); + updatedGroupsByColour.add(atomsOfThisColour); } } return changeFound; } /** - * Produces a sorted (low to high) list of the colour of the atoms surrounding a given atom + * Produces a sorted (low to high) array of the colour of the atoms surrounding a given atom * @param atom - * @return List colourOfAdjacentAtoms + * @return int[] colourOfAdjacentAtoms */ - private List findColourOfNeighbours(Atom atom) { - List colourOfAdjacentAtoms = new ArrayList(); + private int[] findColourOfNeighbours(Atom atom) { List bonds = atom.getBonds(); - for (Bond bond : bonds) { - Atom otherAtom = bond.getFromAtom() == atom ? bond.getToAtom() : bond.getFromAtom(); - colourOfAdjacentAtoms.add(mappingToColour.get(otherAtom)); + int bondCount = bonds.size(); + int[] colourOfAdjacentAtoms = new int[bondCount]; + for (int i = 0; i < bondCount; i++) { + Bond bond = bonds.get(i); + Atom otherAtom = bond.getOtherAtom(atom); + colourOfAdjacentAtoms[i] = mappingToColour.get(otherAtom); } - Collections.sort(colourOfAdjacentAtoms);//sort such that this goes from low to high + Arrays.sort(colourOfAdjacentAtoms);//sort such that this goes from low to high return colourOfAdjacentAtoms; } @@ -367,9 +392,8 @@ * @return */ private List getPotentialStereoCentres() { - List atomList = molecule.getAtomList(); List potentialStereoAtoms = new ArrayList(); - for (Atom atom : atomList) { + for (Atom atom : atoms) { if (isPossiblyStereogenic(atom)){ potentialStereoAtoms.add(atom); } @@ -388,7 +412,7 @@ return false; } int[] colours = new int[4]; - for (int i = neighbours.size() -1 ; i >=0; i--) { + for (int i = neighbours.size() - 1 ; i >=0; i--) { colours[i] = mappingToColour.get(neighbours.get(i)); } @@ -415,9 +439,9 @@ List paraStereoCentres = new ArrayList(); for (Atom potentialStereoAtom : potentialStereoAtoms) { List neighbours = potentialStereoAtom.getAtomNeighbours(); - if (neighbours.size()==4){ + if (neighbours.size() == 4){ int[] colours = new int[4]; - for (int i = neighbours.size() -1 ; i >=0; i--) { + for (int i = neighbours.size() - 1 ; i >=0; i--) { colours[i] = mappingToColour.get(neighbours.get(i)); } //find pairs of constitutionally identical substituents @@ -506,20 +530,20 @@ */ static boolean isKnownPotentiallyStereogenic(Atom atom) { List neighbours = atom.getAtomNeighbours(); - String element = atom.getElement(); + ChemEl chemEl = atom.getElement(); if (neighbours.size() == 4){ - if (element.equals("B") || element.equals("C") || element.equals("Si") || element.equals("Ge") || - element.equals("Sn") || element.equals("N")|| element.equals("P") || element.equals("As") || - element.equals("S") || element.equals("Se")){ + if (chemEl == ChemEl.B || chemEl == ChemEl.C || chemEl == ChemEl.Si || chemEl == ChemEl.Ge || + chemEl == ChemEl.Sn || chemEl == ChemEl.N || chemEl == ChemEl.P || chemEl == ChemEl.As || + chemEl == ChemEl.S || chemEl == ChemEl.Se){ return true; } } else if (neighbours.size() ==3){ - if ((element.equals("S") || element.equals("Se")) && (atom.getIncomingValency()==4 || (atom.getCharge() ==1 && atom.getIncomingValency()==3))){ + if ((chemEl == ChemEl.S || chemEl == ChemEl.Se) && (atom.getIncomingValency()==4 || (atom.getCharge() ==1 && atom.getIncomingValency()==3))){ //tetrahedral sulfur/selenium - 3 bonds and the lone pair return true; } - if (element.equals("N") && atom.getCharge() ==0 && atom.getIncomingValency()==3 && atomsContainABondBetweenThemselves(neighbours)){ + if (chemEl == ChemEl.N && atom.getCharge() ==0 && atom.getIncomingValency()==3 && atomsContainABondBetweenThemselves(neighbours)){ return true; //nitrogen where two attached atoms are connected together } @@ -539,24 +563,24 @@ } static boolean isAchiralDueToResonanceOrTautomerism(Atom atom) { - if(atom.getElement().equals("N") || - atom.getElement().equals("P") || - atom.getElement().equals("As") || - atom.getElement().equals("S") || - atom.getElement().equals("Se")){ + ChemEl chemEl = atom.getElement(); + if(chemEl == ChemEl.N || + chemEl == ChemEl.P || + chemEl == ChemEl.As || + chemEl == ChemEl.S || + chemEl == ChemEl.Se) { List neighbours = atom.getAtomNeighbours(); Set resonanceAndTautomerismAtomicElementPlusIsotopes = new HashSet(); for (Atom neighbour : neighbours) { - String element = neighbour.getElement(); - if ((element.equals("O") || element.equals("S") || element.equals("Se") - || element.equals("Te") || element.equals("N")) + ChemEl neighbourChemEl = neighbour.getElement(); + if ((neighbourChemEl.isChalcogen() || neighbourChemEl == ChemEl.N) && isOnlyBondedToHydrogensOtherThanGivenAtom(neighbour, atom)){ - if (resonanceAndTautomerismAtomicElementPlusIsotopes.contains(element + atom.getIsotope())){ + if (resonanceAndTautomerismAtomicElementPlusIsotopes.contains(neighbourChemEl.toString() + atom.getIsotope())){ return true; } - resonanceAndTautomerismAtomicElementPlusIsotopes.add(element + atom.getIsotope()); + resonanceAndTautomerismAtomicElementPlusIsotopes.add(neighbourChemEl.toString() + atom.getIsotope()); } - if (element.equals("H") && neighbour.getBonds().size()==1){ + if (neighbourChemEl == ChemEl.H && neighbour.getBondCount()==1){ //terminal H atom neighbour return true; } @@ -570,7 +594,7 @@ if (neighbour.equals(attachedNonHydrogen)){ continue; } - if (!neighbour.getElement().equals("H")){ + if (neighbour.getElement() != ChemEl.H){ return false; } } @@ -584,21 +608,20 @@ * @return */ List findStereoBonds() { - Set bondSet =molecule.getBondSet(); List stereoBonds = new ArrayList(); - for (Bond bond : bondSet) { + for (Bond bond : bonds) { if (bond.getOrder()==2){ Atom a1 = bond.getFromAtom(); List neighbours1 = a1.getAtomNeighbours(); neighbours1.remove(bond.getToAtom()); - if (neighbours1.size()==2 || (neighbours1.size()==1 && a1.getElement().equals("N") && a1.getIncomingValency()==3 && a1.getCharge()==0)){ + if (neighbours1.size()==2 || (neighbours1.size()==1 && a1.getElement() == ChemEl.N && a1.getIncomingValency()==3 && a1.getCharge()==0)){ if (neighbours1.size()==2 && mappingToColour.get(neighbours1.get(0)).equals(mappingToColour.get(neighbours1.get(1)))){ continue; } Atom a2 = bond.getToAtom(); List neighbours2 = a2.getAtomNeighbours(); neighbours2.remove(bond.getFromAtom()); - if (neighbours2.size()==2 || (neighbours2.size()==1 && a2.getElement().equals("N") && a2.getIncomingValency()==3 && a2.getCharge()==0)){ + if (neighbours2.size()==2 || (neighbours2.size()==1 && a2.getElement() == ChemEl.N && a2.getIncomingValency()==3 && a2.getCharge()==0)){ if (neighbours2.size()==2 && mappingToColour.get(neighbours2.get(0)).equals(mappingToColour.get(neighbours2.get(1)))){ continue; } diff -Nru opsin-1.5.0/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/StereochemistryHandler.java opsin-2.3.1/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/StereochemistryHandler.java --- opsin-1.5.0/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/StereochemistryHandler.java 2013-07-21 14:02:29.000000000 +0000 +++ opsin-2.3.1/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/StereochemistryHandler.java 2017-07-23 20:55:18.000000000 +0000 @@ -5,19 +5,17 @@ import java.util.HashMap; import java.util.HashSet; import java.util.List; +import java.util.Locale; import java.util.Map; import java.util.Set; import java.util.Map.Entry; import uk.ac.cam.ch.wwmm.opsin.BondStereo.BondStereoValue; +import uk.ac.cam.ch.wwmm.opsin.OpsinWarning.OpsinWarningType; import uk.ac.cam.ch.wwmm.opsin.StereoAnalyser.StereoBond; import uk.ac.cam.ch.wwmm.opsin.StereoAnalyser.StereoCentre; - -import static uk.ac.cam.ch.wwmm.opsin.OpsinTools.*; import static uk.ac.cam.ch.wwmm.opsin.XmlDeclarations.*; -import nu.xom.Element; - /** * Identifies stereocentres, assigns stereochemistry elements to them and then uses the CIP rules to calculate appropriates atomParity/bondstereo tags * @author dl387 @@ -43,9 +41,8 @@ * Processes and assigns stereochemistry elements to appropriate fragments * @param stereoChemistryEls * @throws StructureBuildingException - * @throws StereochemistryException */ - void applyStereochemicalElements(List stereoChemistryEls) throws StructureBuildingException, StereochemistryException { + void applyStereochemicalElements(List stereoChemistryEls) throws StructureBuildingException { List locantedStereoChemistryEls = new ArrayList(); List unlocantedStereoChemistryEls = new ArrayList(); List carbohydrateStereoChemistryEls = new ArrayList(); @@ -62,13 +59,33 @@ } //perform locanted before unlocanted to avoid unlocanted elements using the stereocentres a locanted element refers to for (Element stereochemistryEl : locantedStereoChemistryEls) { - matchStereochemistryToAtomsAndBonds(stereochemistryEl); + try { + matchStereochemistryToAtomsAndBonds(stereochemistryEl); + } + catch (StereochemistryException e) { + if (state.n2sConfig.warnRatherThanFailOnUninterpretableStereochemistry()){ + state.addWarning(OpsinWarningType.STEREOCHEMISTRY_IGNORED, e.getMessage()); + } + else{ + throw e; + } + } } if (!carbohydrateStereoChemistryEls.isEmpty()){ processCarbohydrateStereochemistry(carbohydrateStereoChemistryEls); } for (Element stereochemistryEl : unlocantedStereoChemistryEls) { - matchStereochemistryToAtomsAndBonds(stereochemistryEl); + try { + matchStereochemistryToAtomsAndBonds(stereochemistryEl); + } + catch (StereochemistryException e) { + if (state.n2sConfig.warnRatherThanFailOnUninterpretableStereochemistry()){ + state.addWarning(OpsinWarningType.STEREOCHEMISTRY_IGNORED, e.getMessage()); + } + else{ + throw e; + } + } } } @@ -111,10 +128,25 @@ } } else if (stereoChemistryType.equals(ALPHA_OR_BETA_TYPE_VAL)){ - assignAlphaBetaStereochem(stereoChemistryEl); + assignAlphaBetaXiStereochem(stereoChemistryEl); + } + else if (stereoChemistryType.equals(DLSTEREOCHEMISTRY_TYPE_VAL)){ + assignDlStereochem(stereoChemistryEl); + } + else if (stereoChemistryType.equals(ENDO_EXO_SYN_ANTI_TYPE_VAL)){ + throw new StereochemistryException(stereoChemistryType + " stereochemistry is not currently interpretable by OPSIN"); + } + else if (stereoChemistryType.equals(RELATIVECISTRANS_TYPE_VAL)){ + throw new StereochemistryException(stereoChemistryType + " stereochemistry is not currently interpretable by OPSIN"); + } + else if (stereoChemistryType.equals(AXIAL_TYPE_VAL)){ + throw new StereochemistryException(stereoChemistryType + " stereochemistry is not currently interpretable by OPSIN"); + } + else if (stereoChemistryType.equals(OPTICALROTATION_TYPE_VAL)){ + state.addWarning(OpsinWarningType.STEREOCHEMISTRY_IGNORED, "Optical rotation cannot be algorithmically used to assign stereochemistry. This term was ignored: " + stereoChemistryEl.getValue()); } else{ - throw new StructureBuildingException("Unsupported stereochemistry type: " +stereoChemistryType); + throw new StructureBuildingException("Unexpected stereochemistry type: " +stereoChemistryType); } stereoChemistryEl.detach(); } @@ -128,7 +160,7 @@ private void processCarbohydrateStereochemistry(List carbohydrateStereoChemistryEls) throws StructureBuildingException { Map> groupToStereochemEls = new HashMap>(); for (Element carbohydrateStereoChemistryEl : carbohydrateStereoChemistryEls) { - Element nextGroup = (Element) XOMTools.getNextSibling(carbohydrateStereoChemistryEl, GROUP_EL); + Element nextGroup = OpsinTools.getNextSibling(carbohydrateStereoChemistryEl, GROUP_EL); if (nextGroup ==null || (!SYSTEMATICCARBOHYDRATESTEMALDOSE_SUBTYPE_VAL.equals(nextGroup.getAttributeValue(SUBTYPE_ATR)) && !SYSTEMATICCARBOHYDRATESTEMKETOSE_SUBTYPE_VAL.equals(nextGroup.getAttributeValue(SUBTYPE_ATR)))){ throw new RuntimeException("OPSIN bug: Could not find carbohydrate chain stem to apply stereochemistry to"); @@ -136,8 +168,7 @@ if (groupToStereochemEls.get(nextGroup)==null){ groupToStereochemEls.put(nextGroup, new ArrayList()); } - List stereochemistryEls = groupToStereochemEls.get(nextGroup); - stereochemistryEls.add(carbohydrateStereoChemistryEl); + groupToStereochemEls.get(nextGroup).add(carbohydrateStereoChemistryEl); } for (Entry> entry : groupToStereochemEls.entrySet()) { assignCarbohydratePrefixStereochem(entry.getKey(), entry.getValue()); @@ -151,62 +182,31 @@ * @throws StereochemistryException */ private void assignStereoCentre(Element stereoChemistryEl) throws StructureBuildingException, StereochemistryException { - //generally the LAST group in this list will be the appropriate groups e.g. (5S)-5-ethyl-6-methylheptane where the heptane is the appropriate group + //generally the LAST group in this list will be the appropriate group e.g. (5S)-5-ethyl-6-methylheptane where the heptane is the appropriate group //we use the same algorithm as for unlocanted substitution so as to deprecate assignment into brackets - Element parentSubBracketOrRoot = (Element) stereoChemistryEl.getParent(); - List possibleFragments = StructureBuildingMethods.findAlternativeFragments(state, parentSubBracketOrRoot); - List adjacentGroupEls = XOMTools.getDescendantElementsWithTagName(parentSubBracketOrRoot, GROUP_EL); + Element parentSubBracketOrRoot = stereoChemistryEl.getParent(); + List possibleFragments = StructureBuildingMethods.findAlternativeFragments(parentSubBracketOrRoot); + List adjacentGroupEls = OpsinTools.getDescendantElementsWithTagName(parentSubBracketOrRoot, GROUP_EL); for (int i = adjacentGroupEls.size()-1; i >=0; i--) { - possibleFragments.add(state.xmlFragmentMap.get(adjacentGroupEls.get(i))); + possibleFragments.add(adjacentGroupEls.get(i).getFrag()); } String locant = stereoChemistryEl.getAttributeValue(LOCANT_ATR); String rOrS = stereoChemistryEl.getAttributeValue(VALUE_ATR); for (Fragment fragment : possibleFragments) { - if (locant ==null){//undefined locant - List atomList = fragment.getAtomList(); - for (Atom potentialStereoAtom : atomList) { - if (notExplicitlyDefinedStereoCentreMap.containsKey(potentialStereoAtom)){ - applyStereoChemistryToStereoCentre(potentialStereoAtom, notExplicitlyDefinedStereoCentreMap.get(potentialStereoAtom), rOrS); - notExplicitlyDefinedStereoCentreMap.remove(potentialStereoAtom); - return; - } - } - } - else{ - Atom potentialStereoAtom = fragment.getAtomByLocant(locant); - if (potentialStereoAtom !=null && notExplicitlyDefinedStereoCentreMap.containsKey(potentialStereoAtom)){ - applyStereoChemistryToStereoCentre(potentialStereoAtom, notExplicitlyDefinedStereoCentreMap.get(potentialStereoAtom), rOrS); - notExplicitlyDefinedStereoCentreMap.remove(potentialStereoAtom); - return; - } + if (attemptAssignmentOfStereoCentreToFragment(fragment, rOrS, locant)) { + return; } } - Element possibleWordParent = (Element) parentSubBracketOrRoot.getParent(); - if (possibleWordParent.getLocalName().equals(WORD_EL) && possibleWordParent.getAttributeValue(TYPE_ATR).equals(WordType.substituent.toString())){ + Element possibleWordParent = parentSubBracketOrRoot.getParent(); + if (possibleWordParent.getName().equals(WORD_EL) && possibleWordParent.getChild(0).equals(parentSubBracketOrRoot)){ //something like (3R,4R,5R)-ethyl 4-acetamido-5-amino-3-(pentan-3-yloxy)cyclohex-1-enecarboxylate - //I think this is a violation of the IUPAC rules...but anyway... - List words = XOMTools.getChildElementsWithTagNameAndAttribute(((Element)possibleWordParent.getParent()), WORD_EL, TYPE_ATR, WordType.full.toString()); + //i.e. the stereochemistry is in a different word to what it is applied to + List words = OpsinTools.getNextSiblingsOfType(possibleWordParent, WORD_EL); for (Element word : words) { - List possibleGroups = XOMTools.getDescendantElementsWithTagName(word, GROUP_EL); + List possibleGroups = OpsinTools.getDescendantElementsWithTagName(word, GROUP_EL); for (int i = possibleGroups.size()-1; i >=0; i--) { - Fragment correspondingFrag = state.xmlFragmentMap.get(possibleGroups.get(i)); - if (locant == null){//undefined locant - List atomList = correspondingFrag.getAtomList(); - for (Atom potentialStereoAtom : atomList) { - if (notExplicitlyDefinedStereoCentreMap.containsKey(potentialStereoAtom)){ - applyStereoChemistryToStereoCentre(potentialStereoAtom, notExplicitlyDefinedStereoCentreMap.get(potentialStereoAtom), rOrS); - notExplicitlyDefinedStereoCentreMap.remove(potentialStereoAtom); - return; - } - } - } - else{ - Atom potentialStereoAtom = correspondingFrag.getAtomByLocant(locant); - if (potentialStereoAtom !=null && notExplicitlyDefinedStereoCentreMap.containsKey(potentialStereoAtom)){ - applyStereoChemistryToStereoCentre(potentialStereoAtom, notExplicitlyDefinedStereoCentreMap.get(potentialStereoAtom), rOrS); - notExplicitlyDefinedStereoCentreMap.remove(potentialStereoAtom); - return; - } + if (attemptAssignmentOfStereoCentreToFragment(possibleGroups.get(i).getFrag(), rOrS, locant)) { + return; } } } @@ -215,14 +215,37 @@ } + private boolean attemptAssignmentOfStereoCentreToFragment(Fragment fragment, String rOrS, String locant) throws StereochemistryException, StructureBuildingException { + if (locant == null) {//undefined locant + List atomList = fragment.getAtomList(); + for (Atom potentialStereoAtom : atomList) { + if (notExplicitlyDefinedStereoCentreMap.containsKey(potentialStereoAtom)){ + applyStereoChemistryToStereoCentre(potentialStereoAtom, notExplicitlyDefinedStereoCentreMap.get(potentialStereoAtom), rOrS); + notExplicitlyDefinedStereoCentreMap.remove(potentialStereoAtom); + return true; + } + } + } + else{ + Atom potentialStereoAtom = fragment.getAtomByLocant(locant); + if (potentialStereoAtom !=null && notExplicitlyDefinedStereoCentreMap.containsKey(potentialStereoAtom)){ + applyStereoChemistryToStereoCentre(potentialStereoAtom, notExplicitlyDefinedStereoCentreMap.get(potentialStereoAtom), rOrS); + notExplicitlyDefinedStereoCentreMap.remove(potentialStereoAtom); + return true; + } + } + return false; + } + /** * Assigns atom parity to the given atom in accordance with the CIP rules * @param atom The stereoAtom * @param stereoCentre * @param rOrS The description given in the name * @throws StructureBuildingException + * @throws StereochemistryException */ - private void applyStereoChemistryToStereoCentre(Atom atom, StereoCentre stereoCentre, String rOrS) throws StructureBuildingException { + private void applyStereoChemistryToStereoCentre(Atom atom, StereoCentre stereoCentre, String rOrS) throws StructureBuildingException, StereochemistryException { List cipOrderedAtoms =stereoCentre.getCipOrderedAtoms(); if (cipOrderedAtoms.size()!=4){ throw new StructureBuildingException("Only tetrahedral chirality is currently supported"); @@ -238,6 +261,9 @@ else if (rOrS.equals("S")){ atom.setAtomParity(atomRefs4, 1); } + else if (rOrS.equals("RS") || rOrS.equals("SR")){ + atom.setAtomParity(null); + } else{ throw new StructureBuildingException("Unexpected stereochemistry type: " + rOrS); } @@ -253,11 +279,11 @@ private void assignStereoBond(Element stereoChemistryEl) throws StructureBuildingException, StereochemistryException { //generally the LAST group in this list will be the appropriate groups e.g. (2Z)-5-ethyl-6-methylhex-2-ene where the hex-2-ene is the appropriate group //we use the same algorithm as for unlocanted substitution so as to deprecate assignment into brackets - Element parentSubBracketOrRoot = (Element) stereoChemistryEl.getParent(); - List possibleFragments = StructureBuildingMethods.findAlternativeFragments(state, parentSubBracketOrRoot); - List adjacentGroupEls = XOMTools.getDescendantElementsWithTagName(parentSubBracketOrRoot, GROUP_EL); + Element parentSubBracketOrRoot = stereoChemistryEl.getParent(); + List possibleFragments = StructureBuildingMethods.findAlternativeFragments(parentSubBracketOrRoot); + List adjacentGroupEls = OpsinTools.getDescendantElementsWithTagName(parentSubBracketOrRoot, GROUP_EL); for (int i = adjacentGroupEls.size()-1; i >=0; i--) { - possibleFragments.add(state.xmlFragmentMap.get(adjacentGroupEls.get(i))); + possibleFragments.add(adjacentGroupEls.get(i).getFrag()); } String locant = stereoChemistryEl.getAttributeValue(LOCANT_ATR); String eOrZ = stereoChemistryEl.getAttributeValue(VALUE_ATR); @@ -276,76 +302,20 @@ } } for (Fragment fragment : possibleFragments) { - if (locant == null){//undefined locant - Set bondSet = fragment.getBondSet(); - for (Bond potentialBond : bondSet) { - if (notExplicitlyDefinedStereoBondMap.containsKey(potentialBond) && (!isCisTrans || cisTransUnambiguousOnBond(potentialBond))){ - applyStereoChemistryToStereoBond(potentialBond, notExplicitlyDefinedStereoBondMap.get(potentialBond), eOrZ); - notExplicitlyDefinedStereoBondMap.remove(potentialBond); - return; - } - } - List sortedInterFragmentBonds = sortInterFragmentBonds(state.fragManager.getInterFragmentBonds(fragment), fragment); - for (Bond potentialBond : sortedInterFragmentBonds) { - if (notExplicitlyDefinedStereoBondMap.containsKey(potentialBond) && (!isCisTrans || cisTransUnambiguousOnBond(potentialBond))){ - applyStereoChemistryToStereoBond(potentialBond, notExplicitlyDefinedStereoBondMap.get(potentialBond), eOrZ); - notExplicitlyDefinedStereoBondMap.remove(potentialBond); - return; - } - } - } - else{ - Atom firstAtomInBond = fragment.getAtomByLocant(locant); - if (firstAtomInBond !=null){ - List bonds = firstAtomInBond.getBonds(); - for (Bond potentialBond : bonds) { - if (notExplicitlyDefinedStereoBondMap.containsKey(potentialBond) && (!isCisTrans || cisTransUnambiguousOnBond(potentialBond))){ - applyStereoChemistryToStereoBond(potentialBond, notExplicitlyDefinedStereoBondMap.get(potentialBond), eOrZ); - notExplicitlyDefinedStereoBondMap.remove(potentialBond); - return; - } - } - } + if (attemptAssignmentOfStereoBondToFragment(fragment, eOrZ, locant, isCisTrans)) { + return; } } - Element possibleWordParent = (Element) parentSubBracketOrRoot.getParent(); - if (possibleWordParent.getLocalName().equals(WORD_EL) && possibleWordParent.getAttributeValue(TYPE_ATR).equals(WordType.substituent.toString())){ + Element possibleWordParent = parentSubBracketOrRoot.getParent(); + if (possibleWordParent.getName().equals(WORD_EL) && possibleWordParent.getAttributeValue(TYPE_ATR).equals(WordType.substituent.toString())){ //the element is in front of a substituent and may refer to the full group - List words = XOMTools.getChildElementsWithTagNameAndAttribute(((Element)possibleWordParent.getParent()), WORD_EL, TYPE_ATR, WordType.full.toString()); + //i.e. the stereochemistry is in a different word to what it is applied to + List words = OpsinTools.getChildElementsWithTagNameAndAttribute(possibleWordParent.getParent(), WORD_EL, TYPE_ATR, WordType.full.toString()); for (Element word : words) { - List possibleGroups = XOMTools.getDescendantElementsWithTagName(word, GROUP_EL); + List possibleGroups = OpsinTools.getDescendantElementsWithTagName(word, GROUP_EL); for (int i = possibleGroups.size()-1; i >=0; i--) { - Fragment correspondingFrag = state.xmlFragmentMap.get(possibleGroups.get(i)); - if (locant == null){//undefined locant - Set bondSet = correspondingFrag.getBondSet(); - for (Bond potentialBond : bondSet) { - if (notExplicitlyDefinedStereoBondMap.containsKey(potentialBond) && (!isCisTrans || cisTransUnambiguousOnBond(potentialBond))){ - applyStereoChemistryToStereoBond(potentialBond, notExplicitlyDefinedStereoBondMap.get(potentialBond), eOrZ); - notExplicitlyDefinedStereoBondMap.remove(potentialBond); - return; - } - } - List sortedInterFragmentBonds = sortInterFragmentBonds(state.fragManager.getInterFragmentBonds(correspondingFrag), correspondingFrag); - for (Bond potentialBond : sortedInterFragmentBonds) { - if (notExplicitlyDefinedStereoBondMap.containsKey(potentialBond) && (!isCisTrans || cisTransUnambiguousOnBond(potentialBond))){ - applyStereoChemistryToStereoBond(potentialBond, notExplicitlyDefinedStereoBondMap.get(potentialBond), eOrZ); - notExplicitlyDefinedStereoBondMap.remove(potentialBond); - return; - } - } - } - else{ - Atom firstAtomInBond = correspondingFrag.getAtomByLocant(locant); - if (firstAtomInBond !=null){ - List bonds = firstAtomInBond.getBonds(); - for (Bond potentialBond : bonds) { - if (notExplicitlyDefinedStereoBondMap.containsKey(potentialBond) && (!isCisTrans || cisTransUnambiguousOnBond(potentialBond))){ - applyStereoChemistryToStereoBond(potentialBond, notExplicitlyDefinedStereoBondMap.get(potentialBond), eOrZ); - notExplicitlyDefinedStereoBondMap.remove(potentialBond); - return; - } - } - } + if (attemptAssignmentOfStereoBondToFragment(possibleGroups.get(i).getFrag(), eOrZ, locant, isCisTrans)) { + return; } } } @@ -359,6 +329,41 @@ } + private boolean attemptAssignmentOfStereoBondToFragment(Fragment fragment, String eOrZ, String locant, boolean isCisTrans) throws StereochemistryException { + if (locant == null){//undefined locant + Set bondSet = fragment.getBondSet(); + for (Bond potentialBond : bondSet) { + if (notExplicitlyDefinedStereoBondMap.containsKey(potentialBond) && (!isCisTrans || cisTransUnambiguousOnBond(potentialBond))){ + applyStereoChemistryToStereoBond(potentialBond, notExplicitlyDefinedStereoBondMap.get(potentialBond), eOrZ); + notExplicitlyDefinedStereoBondMap.remove(potentialBond); + return true; + } + } + List sortedInterFragmentBonds = sortInterFragmentBonds(state.fragManager.getInterFragmentBonds(fragment), fragment); + for (Bond potentialBond : sortedInterFragmentBonds) { + if (notExplicitlyDefinedStereoBondMap.containsKey(potentialBond) && (!isCisTrans || cisTransUnambiguousOnBond(potentialBond))){ + applyStereoChemistryToStereoBond(potentialBond, notExplicitlyDefinedStereoBondMap.get(potentialBond), eOrZ); + notExplicitlyDefinedStereoBondMap.remove(potentialBond); + return true; + } + } + } + else{ + Atom firstAtomInBond = fragment.getAtomByLocant(locant); + if (firstAtomInBond !=null){ + List bonds = firstAtomInBond.getBonds(); + for (Bond potentialBond : bonds) { + if (notExplicitlyDefinedStereoBondMap.containsKey(potentialBond) && (!isCisTrans || cisTransUnambiguousOnBond(potentialBond))){ + applyStereoChemistryToStereoBond(potentialBond, notExplicitlyDefinedStereoBondMap.get(potentialBond), eOrZ); + notExplicitlyDefinedStereoBondMap.remove(potentialBond); + return true; + } + } + } + } + return false; + } + /** * Does the stereoBond have a hydrogen connected to both ends of it. * If not it is ambiguous when used in conjunction with cis/trans and E/Z should be used. @@ -369,7 +374,7 @@ List neighbours1 = potentialBond.getFromAtom().getAtomNeighbours(); boolean foundHydrogen1 =false; for (Atom neighbour : neighbours1) { - if (neighbour.getElement().equals("H")){ + if (neighbour.getElement() == ChemEl.H){ foundHydrogen1 =true; } } @@ -377,7 +382,7 @@ List neighbours2 = potentialBond.getToAtom().getAtomNeighbours(); boolean foundHydrogen2 =false; for (Atom neighbour : neighbours2) { - if (neighbour.getElement().equals("H")){ + if (neighbour.getElement() == ChemEl.H){ foundHydrogen2 =true; } } @@ -410,8 +415,9 @@ * @param bond The stereobond * @param stereoBond * @param eOrZ The stereo description given in the name + * @throws StereochemistryException */ - private void applyStereoChemistryToStereoBond(Bond bond, StereoBond stereoBond, String eOrZ ) { + private void applyStereoChemistryToStereoBond(Bond bond, StereoBond stereoBond, String eOrZ ) throws StereochemistryException { List stereoBondAtoms = stereoBond.getOrderedStereoAtoms(); //stereoBondAtoms contains the higher priority atom at one end, the two bond atoms and the higher priority atom at the other end Atom[] atomRefs4 = new Atom[4]; @@ -425,6 +431,9 @@ else if (eOrZ.equals("Z")){ bond.setBondStereoElement(atomRefs4, BondStereoValue.CIS); } + else if (eOrZ.equals("EZ")){ + bond.setBondStereo(null); + } else{ throw new IllegalArgumentException("Unexpected stereochemistry type: " + eOrZ); } @@ -438,61 +447,82 @@ * @throws StructureBuildingException */ private boolean assignCisTransOnRing(Element stereoChemistryEl) throws StructureBuildingException { - if (stereoChemistryEl.getAttribute(LOCANT_ATR)!=null){ + if (stereoChemistryEl.getAttribute(LOCANT_ATR) != null) { return false; } - Element parentSubBracketOrRoot = (Element) stereoChemistryEl.getParent(); - List possibleFragments = StructureBuildingMethods.findAlternativeFragments(state, parentSubBracketOrRoot); - List adjacentGroupEls = XOMTools.getDescendantElementsWithTagName(parentSubBracketOrRoot, GROUP_EL); + Element parentSubBracketOrRoot = stereoChemistryEl.getParent(); + List possibleFragments = StructureBuildingMethods.findAlternativeFragments(parentSubBracketOrRoot); + List adjacentGroupEls = OpsinTools.getDescendantElementsWithTagName(parentSubBracketOrRoot, GROUP_EL); for (int i = adjacentGroupEls.size()-1; i >=0; i--) { - possibleFragments.add(state.xmlFragmentMap.get(adjacentGroupEls.get(i))); + possibleFragments.add(adjacentGroupEls.get(i).getFrag()); } for (Fragment fragment : possibleFragments) { - List atomList = fragment.getAtomList(); - List stereoAtoms = new ArrayList(); - for (Atom potentialStereoAtom : atomList) { - if (potentialStereoAtom.getAtomIsInACycle()){ - List neighbours = potentialStereoAtom.getAtomNeighbours(); - if (neighbours.size()==4){ - int hydrogenCount =0; - int acylicOrNotInFrag =0; - for (Atom neighbour : neighbours) { - if (neighbour.getElement().equals("H")){ - hydrogenCount++; - } - if (!neighbour.getAtomIsInACycle() || !atomList.contains(neighbour)){ - acylicOrNotInFrag++; - } + if (attemptAssignmentOfCisTransRingStereoToFragment(fragment, stereoChemistryEl)){ + return true; + } + } + + Element possibleWordParent = parentSubBracketOrRoot.getParent(); + if (possibleWordParent.getName().equals(WORD_EL) && possibleWordParent.getChild(0).equals(parentSubBracketOrRoot)){ + //stereochemistry is in a different word to what it is applied to + List words = OpsinTools.getNextSiblingsOfType(possibleWordParent, WORD_EL); + for (Element word : words) { + List possibleGroups = OpsinTools.getDescendantElementsWithTagName(word, GROUP_EL); + for (int i = possibleGroups.size()-1; i >=0; i--) { + if (attemptAssignmentOfCisTransRingStereoToFragment(possibleGroups.get(i).getFrag(), stereoChemistryEl)) { + return true; + } + } + } + } + return false; + } + + + private boolean attemptAssignmentOfCisTransRingStereoToFragment(Fragment fragment, Element stereoChemistryEl) throws StructureBuildingException { + List atomList = fragment.getAtomList(); + List stereoAtoms = new ArrayList(); + for (Atom potentialStereoAtom : atomList) { + if (potentialStereoAtom.getAtomIsInACycle()){ + List neighbours = potentialStereoAtom.getAtomNeighbours(); + if (neighbours.size()==4){ + int hydrogenCount =0; + int acylicOrNotInFrag =0; + for (Atom neighbour : neighbours) { + if (neighbour.getElement() == ChemEl.H){ + hydrogenCount++; } - if (hydrogenCount==1 || (hydrogenCount==0 && acylicOrNotInFrag ==1) ){ - stereoAtoms.add(potentialStereoAtom); + if (!neighbour.getAtomIsInACycle() || !atomList.contains(neighbour)){ + acylicOrNotInFrag++; } } + if (hydrogenCount==1 || (hydrogenCount==0 && acylicOrNotInFrag ==1) ){ + stereoAtoms.add(potentialStereoAtom); + } } } - if (stereoAtoms.size()==2){ - Atom a1 = stereoAtoms.get(0); - Atom a2 = stereoAtoms.get(1); - - if (a1.getAtomParity()!=null && a2.getAtomParity()!=null){//one can have defined stereochemistry but not both - return false; - } - - Set peripheryBonds = determinePeripheryBonds(fragment); - List> paths = CycleDetector.getPathBetweenAtomsUsingBonds(a1, a2, peripheryBonds); - if (paths.size()!=2){ - return false; - } - applyStereoChemistryToCisTransOnRing(a1, a2, paths, atomList, stereoChemistryEl.getAttributeValue(VALUE_ATR)); - notExplicitlyDefinedStereoCentreMap.remove(stereoAtoms.get(0)); - notExplicitlyDefinedStereoCentreMap.remove(stereoAtoms.get(1)); - return true; + } + if (stereoAtoms.size()==2){ + Atom a1 = stereoAtoms.get(0); + Atom a2 = stereoAtoms.get(1); + + if (a1.getAtomParity()!=null && a2.getAtomParity()!=null){//one can have defined stereochemistry but not both + return false; } + + Set peripheryBonds = determinePeripheryBonds(fragment); + List> paths = CycleDetector.getPathBetweenAtomsUsingBonds(a1, a2, peripheryBonds); + if (paths.size()!=2){ + return false; + } + applyStereoChemistryToCisTransOnRing(a1, a2, paths, atomList, stereoChemistryEl.getAttributeValue(VALUE_ATR)); + notExplicitlyDefinedStereoCentreMap.remove(stereoAtoms.get(0)); + notExplicitlyDefinedStereoCentreMap.remove(stereoAtoms.get(1)); + return true; } return false; } - private Set determinePeripheryBonds(Fragment fragment) { List rings = SSSRFinder.getSetOfSmallestRings(fragment); FusedRingNumberer.setupAdjacentFusedRingProperties(rings); @@ -587,7 +617,7 @@ private Atom getHydrogenOrAcyclicOrOutsideOfFragment(List atoms, List fragmentAtoms) { for (Atom atom : atoms) { - if (atom.getElement().equals("H")){ + if (atom.getElement() == ChemEl.H){ return atom; } } @@ -602,30 +632,36 @@ /** * Handles assignment of alpha and beta stereochemistry to appropriate ring systems * Currently these are only assignable to natural products + * Xi (unknown) stereochemistry is applicable to any tetrahedral centre * @param stereoChemistryEl * @throws StructureBuildingException */ - private void assignAlphaBetaStereochem(Element stereoChemistryEl) throws StructureBuildingException { - Element parentSubBracketOrRoot = (Element) stereoChemistryEl.getParent(); - List possibleFragments = StructureBuildingMethods.findAlternativeFragments(state, parentSubBracketOrRoot); + private void assignAlphaBetaXiStereochem(Element stereoChemistryEl) throws StructureBuildingException { + Element parentSubBracketOrRoot = stereoChemistryEl.getParent(); + List possibleFragments = StructureBuildingMethods.findAlternativeFragments(parentSubBracketOrRoot); Fragment substituentGroup =null; - if (parentSubBracketOrRoot.getLocalName().equals(SUBSTITUENT_EL)){ - substituentGroup =state.xmlFragmentMap.get(parentSubBracketOrRoot.getFirstChildElement(GROUP_EL)); + if (parentSubBracketOrRoot.getName().equals(SUBSTITUENT_EL)){ + substituentGroup = parentSubBracketOrRoot.getFirstChildElement(GROUP_EL).getFrag(); } - List adjacentGroupEls = XOMTools.getDescendantElementsWithTagName(parentSubBracketOrRoot, GROUP_EL); + List adjacentGroupEls = OpsinTools.getDescendantElementsWithTagName(parentSubBracketOrRoot, GROUP_EL); for (int i = adjacentGroupEls.size()-1; i >=0; i--) { - possibleFragments.add(state.xmlFragmentMap.get(adjacentGroupEls.get(i))); + possibleFragments.add(adjacentGroupEls.get(i).getFrag()); } String locant = stereoChemistryEl.getAttributeValue(LOCANT_ATR); String alphaOrBeta = stereoChemistryEl.getAttributeValue(VALUE_ATR); for (Fragment fragment : possibleFragments) { Atom potentialStereoAtom = fragment.getAtomByLocant(locant); - if (potentialStereoAtom !=null && atomStereoCentreMap.containsKey(potentialStereoAtom)){//same stereocentre can defined twice e.g. one subsituent alpha the other beta - String alphaBetaClockWiseAtomOrdering = state.xmlFragmentMap.getElement(fragment).getAttributeValue(ALPHABETACLOCKWISEATOMORDERING_ATR); - if (alphaBetaClockWiseAtomOrdering==null){ - throw new StructureBuildingException("Identified fragment is not known to be able to support alpha/beta stereochemistry"); + if (potentialStereoAtom !=null && atomStereoCentreMap.containsKey(potentialStereoAtom)){//same stereocentre can be defined twice e.g. one subsituent alpha the other beta + if (alphaOrBeta.equals("xi")){ + potentialStereoAtom.setAtomParity(null); + } + else { + String alphaBetaClockWiseAtomOrdering = fragment.getTokenEl().getAttributeValue(ALPHABETACLOCKWISEATOMORDERING_ATR); + if (alphaBetaClockWiseAtomOrdering==null){ + throw new StructureBuildingException("Identified fragment is not known to be able to support alpha/beta stereochemistry"); + } + applyAlphaBetaStereochemistryToStereoCentre(potentialStereoAtom, fragment, alphaBetaClockWiseAtomOrdering, alphaOrBeta, substituentGroup); } - applyAlphaBetaStereochemistryToStereoCentre(potentialStereoAtom, fragment, alphaBetaClockWiseAtomOrdering, alphaOrBeta, substituentGroup); notExplicitlyDefinedStereoCentreMap.remove(potentialStereoAtom); return; } @@ -647,7 +683,7 @@ * @throws StructureBuildingException */ private void applyAlphaBetaStereochemistryToStereoCentre(Atom stereoAtom, Fragment fragment, String alphaBetaClockWiseAtomOrdering, String alphaOrBeta, Fragment substituentGroup) throws StructureBuildingException { - List ringOrder = StringTools.arrayToList(MATCH_SLASH.split(alphaBetaClockWiseAtomOrdering)); + List ringOrder = StringTools.arrayToList(alphaBetaClockWiseAtomOrdering.split("/")); int positionInList = ringOrder.indexOf(stereoAtom.getFirstLocant()); if (stereoAtom.getAtomIsInACycle() && positionInList!=-1){ Atom[] atomRefs4 = new Atom[4]; @@ -669,11 +705,11 @@ atomRefs4[1]=a2; atomRefs4[2]=a1; } - else if (a1.getElement().equals("H") && !a2.getElement().equals("H")){ + else if (a1.getElement() == ChemEl.H && a2.getElement() != ChemEl.H){ atomRefs4[1]=a2; atomRefs4[2]=a1; } - else if (a2.getElement().equals("H") && !a1.getElement().equals("H")){ + else if (a2.getElement() == ChemEl.H && a1.getElement() != ChemEl.H){ atomRefs4[1]=a1; atomRefs4[2]=a2; }//TODO support case where alpha/beta are applied prior to a suffix (and the stereocentre doesn't have a hydrogen) e.g. 17alpha-yl @@ -695,9 +731,6 @@ else if (alphaOrBeta.equals("beta")){ stereoAtom.setAtomParity(atomRefs4, -1); } - else if (alphaOrBeta.equals("xi")){ - stereoAtom.setAtomParity(null); - } else{ throw new StructureBuildingException("OPSIN Bug: malformed alpha/beta stereochemistry value"); } @@ -735,7 +768,7 @@ * @throws StructureBuildingException */ private void assignCarbohydratePrefixStereochem(Element carbohydrateGroup, List carbohydrateStereoChemistryEls) throws StructureBuildingException { - Fragment carbohydrate = state.xmlFragmentMap.get(carbohydrateGroup); + Fragment carbohydrate = carbohydrateGroup.getFrag(); Set atoms = notExplicitlyDefinedStereoCentreMap.keySet(); List stereocentresInCarbohydrate = new ArrayList(); for (Atom atom : atoms) { @@ -751,10 +784,8 @@ Collections.reverse(carbohydrateStereoChemistryEls); List stereocentreConfiguration = new ArrayList(); for (Element carbohydrateStereoChemistryEl: carbohydrateStereoChemistryEls) { - String[] values = MATCH_SLASH.split(carbohydrateStereoChemistryEl.getAttributeValue(VALUE_ATR)); - for (String value : values) { - stereocentreConfiguration.add(value); - } + String[] values = carbohydrateStereoChemistryEl.getAttributeValue(VALUE_ATR).split("/"); + Collections.addAll(stereocentreConfiguration, values); } if (stereocentresInCarbohydrate.size() != stereocentreConfiguration.size()){ @@ -787,34 +818,116 @@ notExplicitlyDefinedStereoCentreMap.remove(stereoAtom); } } + + private void assignDlStereochem(Element stereoChemistryEl) throws StructureBuildingException { + String dOrL = stereoChemistryEl.getAttributeValue(VALUE_ATR); + Element elementToApplyTo = OpsinTools.getNextSiblingIgnoringCertainElements(stereoChemistryEl, new String[]{STEREOCHEMISTRY_EL}); + if (elementToApplyTo != null + && elementToApplyTo.getName().equals(GROUP_EL) + && attemptAssignmentOfDlStereoToFragment(elementToApplyTo.getFrag(), dOrL)){ + // D/L adjacent to group that now has an appropriate stereocentre e.g. glycine + return; + } - static int swapsRequiredToSort(Atom[] atomRefs4){ - Atom[] atomRefs4Copy = atomRefs4.clone(); - int swapsPerformed = 0; - int i,j; - - for (i=atomRefs4Copy.length; --i >=0;) { - boolean swapped = false; - for (j=0; j atomRefs4Copy[j+1].getID()){ - Atom temp = atomRefs4Copy[j+1]; - atomRefs4Copy[j+1] = atomRefs4Copy[j]; - atomRefs4Copy[j] = temp; - swapsPerformed++; - swapped=true; + Element parentSubBracketOrRoot = stereoChemistryEl.getParent(); + //generally the LAST group in this list will be the appropriate group + //we use the same algorithm as for unlocanted substitution so as to deprecate assignment into brackets + List possibleFragments = StructureBuildingMethods.findAlternativeFragments(parentSubBracketOrRoot); + List adjacentGroupEls = OpsinTools.getDescendantElementsWithTagName(parentSubBracketOrRoot, GROUP_EL); + for (int i = adjacentGroupEls.size()-1; i >=0; i--) { + possibleFragments.add(adjacentGroupEls.get(i).getFrag()); + } + for (Fragment fragment : possibleFragments) { + if (attemptAssignmentOfDlStereoToFragment(fragment, dOrL)) { + return; } } - if (!swapped){ - return swapsPerformed; + throw new StereochemistryException("Could not find stereocentre to apply " + dOrL.toUpperCase(Locale.ROOT) + " stereochemistry to"); + } + + + private boolean attemptAssignmentOfDlStereoToFragment(Fragment fragment, String dOrL) throws StereochemistryException, StructureBuildingException { + List atomList = fragment.getAtomList(); + for (Atom potentialStereoAtom : atomList) { + if (notExplicitlyDefinedStereoCentreMap.containsKey(potentialStereoAtom) && potentialStereoAtom.getBondCount() == 4) { + List neighbours = potentialStereoAtom.getAtomNeighbours(); + Atom acidGroup = null;//A carbon connected to non-carbons e.g. COOH + Atom amineOrAlcohol = null;//N or O e.g. NH2 (as this may be substituted don't check H count) + Atom sideChain = null;//A carbon + Atom hydrogen = null;//A hydrogen + for (Atom atom : neighbours) { + ChemEl el = atom.getElement(); + if (el == ChemEl.H) { + hydrogen = atom; + } + else if (el == ChemEl.C) { + int chalcogenNeighbours = 0; + for (Atom neighbour2 : atom.getAtomNeighbours()) { + if (atom == neighbour2) { + continue; + } + if (neighbour2.getElement().isChalcogen()) { + chalcogenNeighbours++; + } + } + if (chalcogenNeighbours > 0) { + acidGroup = atom; + } + else { + sideChain = atom; + } + } + else if (el == ChemEl.O || el ==ChemEl.N) { + amineOrAlcohol = atom; + } + } + if (acidGroup != null && amineOrAlcohol != null && sideChain != null && hydrogen != null) { + Atom[] atomRefs4 = new Atom[]{acidGroup, sideChain, amineOrAlcohol, hydrogen}; + if (dOrL.equals("l") || dOrL.equals("ls")) { + potentialStereoAtom.setAtomParity(atomRefs4, -1); + } else if (dOrL.equals("d") || dOrL.equals("ds")) { + potentialStereoAtom.setAtomParity(atomRefs4, 1); + } else if (dOrL.equals("dl")) { + //racemic + potentialStereoAtom.setAtomParity(null); + } else{ + throw new RuntimeException("OPSIN bug: Unexpected value for D/L stereochemistry found: " + dOrL ); + } + notExplicitlyDefinedStereoCentreMap.remove(potentialStereoAtom); + return true; + } + } + } + return false; + } + + static int swapsRequiredToSort(Atom[] atomRefs4){ + Atom[] atomRefs4Copy = atomRefs4.clone(); + int swapsPerformed = 0; + int i,j; + + for (i=atomRefs4Copy.length; --i >=0;) { + boolean swapped = false; + for (j=0; j atomRefs4Copy[j+1].getID()){ + Atom temp = atomRefs4Copy[j+1]; + atomRefs4Copy[j+1] = atomRefs4Copy[j]; + atomRefs4Copy[j] = temp; + swapsPerformed++; + swapped = true; + } + } + if (!swapped){ + return swapsPerformed; + } } - } - return swapsPerformed; + return swapsPerformed; } static boolean checkEquivalencyOfAtomsRefs4AndParity(Atom[] atomRefs1, int atomParity1, Atom[] atomRefs2, int atomParity2){ - int swaps1 =swapsRequiredToSort(atomRefs1); - int swaps2 =swapsRequiredToSort(atomRefs2); - if (atomParity1<0 && atomParity2>0 || atomParity1>0 && atomParity2<0){ + int swaps1 = swapsRequiredToSort(atomRefs1); + int swaps2 = swapsRequiredToSort(atomRefs2); + if (atomParity1 < 0 && atomParity2 > 0 || atomParity1 > 0 && atomParity2 < 0){ swaps1++; } return swaps1 %2 == swaps2 %2; diff -Nru opsin-1.5.0/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/StreamSerializer.java opsin-2.3.1/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/StreamSerializer.java --- opsin-1.5.0/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/StreamSerializer.java 2013-07-21 14:02:29.000000000 +0000 +++ opsin-2.3.1/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/StreamSerializer.java 1970-01-01 00:00:00.000000000 +0000 @@ -1,35 +0,0 @@ -package uk.ac.cam.ch.wwmm.opsin; - -import java.io.IOException; -import java.io.OutputStream; - -import nu.xom.Element; -import nu.xom.Serializer; - -public class StreamSerializer extends Serializer { - - public StreamSerializer(OutputStream out) { - super(out); - } - - @Override - public void write(Element element) throws IOException { - super.write(element); - } - - @Override - public void writeXMLDeclaration() throws IOException { - super.writeXMLDeclaration(); - } - - @Override - public void writeEndTag(Element element) throws IOException { - super.writeEndTag(element); - } - - @Override - public void writeStartTag(Element element) throws IOException { - super.writeStartTag(element); - } -} - diff -Nru opsin-1.5.0/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/StringTools.java opsin-2.3.1/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/StringTools.java --- opsin-1.5.0/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/StringTools.java 2013-07-21 14:02:29.000000000 +0000 +++ opsin-2.3.1/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/StringTools.java 2017-07-23 20:55:18.000000000 +0000 @@ -3,56 +3,33 @@ import java.util.ArrayList; import java.util.Arrays; import java.util.List; -import static uk.ac.cam.ch.wwmm.opsin.OpsinTools.*; /**Static routines for string manipulation. - * This is a specially tailored version of StringTools as found in OSCAR for use in OPSIN * * @author ptc24 * @author dl387 * */ -public final class StringTools { - - /**Converts a list of characters into a string. - * - * @param l A list of characters. - * @return The corresponding string. - */ - public static String charListToString(List l) { - StringBuffer sb = new StringBuffer(); - for(char c : l) { - sb.append(c); - } - return sb.toString(); - } +class StringTools { /** * Converts a list of strings into a single string delimited by the given separator * - * @param l A list of strings. + * @param list A list of strings. + * @param separator * @return The corresponding string. */ - public static String stringListToString(List l, String separator) { - StringBuffer sb = new StringBuffer(); - for(int i=0;i list, String separator) { + StringBuilder sb = new StringBuilder(); + int lastIndexOfList = list.size() - 1; + for (int i = 0; i < lastIndexOfList; i++) { + sb.append(list.get(i)); + sb.append(separator); } - return sb.toString(); - } - - /**Converts a string to a list of characters. - * - * @param s A string. - * @return The corresponding list of characters. - */ - public static List stringToList(String s) { - List cl = new ArrayList(); - for(int i=0;i= 0){ + sb.append(list.get(lastIndexOfList)); } - return cl; + return sb.toString(); } /**Produce repetitions of a string. Eg. HelloWorld * 2 = HelloWorldHelloWorld. @@ -61,9 +38,9 @@ * @param n The number of times to multiply it. * @return The multiplied string. */ - public static String multiplyString(String s, int n) { - StringBuffer sb = new StringBuffer(); - for(int i=n-1;i>=0;i--) { + static String multiplyString(String s, int n) { + StringBuilder sb = new StringBuilder(); + for (int i = 0; i < n; i++) { sb.append(s); } return sb.toString(); @@ -75,13 +52,16 @@ * @param separator The separator to use. * @return The resulting string. */ - public static String arrayToString(String [] stringArray, String separator) { - StringBuffer sb = new StringBuffer(); - for(int i=0;i= 0){ + sb.append(stringArray[lastIndexOfArray]); + } return sb.toString(); } @@ -93,22 +73,32 @@ * @return The converted string * @throws PreProcessingException */ - public static String convertNonAsciiAndNormaliseRepresentation(String s) throws PreProcessingException { - s = MATCH_WHITESPACE.matcher(s).replaceAll(" ");//normalise white space - StringBuilder sb = new StringBuilder(); + static String convertNonAsciiAndNormaliseRepresentation(String s) throws PreProcessingException { + StringBuilder sb = new StringBuilder(s.length()); for (int i = 0, l = s.length(); i < l; i++) { char c = s.charAt(i); - if(c >= 128) { - sb.append(getReplacementForNonASCIIChar(c));//replace non ascii characters with hard coded ascii strings - } - else if (c == 96){ + switch (c) { + case '\t': + case '\n': + case '\u000B'://vertical tab + case '\f': + case '\r': + //normalise white space + sb.append(" "); + break; + case '`': sb.append("'");//replace back ticks with apostrophe - } - else if (c == 34){ + break; + case '"': sb.append("''");//replace quotation mark with two primes - } - else if (c > 31){//ignore control characters - sb.append(c); + break; + default: + if(c >= 128) { + sb.append(getReplacementForNonASCIIChar(c));//replace non ascii characters with hard coded ascii strings + } + else if (c > 31){//ignore control characters + sb.append(c); + } } } return sb.toString(); @@ -148,9 +138,9 @@ case '\u00B1': return "+-";//plus minus symbol case '\u2213': return "-+"; - case '\u2192': return "->";//right arrows - case '\u2794': return "->"; - case '\u2799': return "->"; + case '\u2192'://right arrows + case '\u2794': + case '\u2799': case '\u279C': return "->"; case '\u00C6': return "AE";//common ligatures @@ -223,7 +213,8 @@ case '\u0117': return "e"; case '\u0116': return "E"; - case '\u00B9': return "1";//superscripts + case '\u2070': return "0";//superscripts + case '\u00B9': return "1"; case '\u00B2': return "2"; case '\u00B3': return "3"; case '\u2074': return "4"; @@ -232,17 +223,30 @@ case '\u2077': return "7"; case '\u2078': return "8"; case '\u2079': return "9"; - case '\u2070': return "0"; + + case '\u2080': return "0";//subscripts + case '\u2081': return "1"; + case '\u2082': return "2"; + case '\u2083': return "3"; + case '\u2084': return "4"; + case '\u2085': return "5"; + case '\u2086': return "6"; + case '\u2087': return "7"; + case '\u2088': return "8"; + case '\u2089': return "9"; case '\u2018': return "'";//quotation marks and primes (map to apostrophe/s) case '\u2019': return "'"; case '\u201B': return "'"; + case '\u02BC': return "'"; case '\u201C': return "''"; case '\u201D': return "''"; case '\u2032': return "'";//primes case '\u2033': return "''"; case '\u2034': return "'''"; case '\u2057': return "''''"; + case '\u02B9': return "'";//modifier primes + case '\u02BA': return "''"; case '\u2035': return "'";//back primes case '\u2036': return "''"; case '\u2037': return "'''"; @@ -252,14 +256,18 @@ case '\u02DD': return "''"; case '\u030B': return "''"; - case '\u2010': return "-";//dashes, hyphens and the minus sign - case '\u2011': return "-"; - case '\u2012': return "-"; - case '\u2013': return "-"; - case '\u2014': return "-"; - case '\u2015': return "-"; + case '\u2010'://dashes, hyphens and the minus sign + case '\u2011': + case '\u2012': + case '\u2013': + case '\u2014': + case '\u2015': case '\u2212': return "-"; + case '\u02DC'://small tilde + case '\u223C'://tilde operator + case '\u301C': return "~";//wave dash + case '\uff0c': return ",";//full width punctuation case '\uFF1A': return ":"; case '\uFF1B': return ";"; @@ -274,15 +282,25 @@ case '\u00DF': return "beta";//similar glyph - case '\u00A0': return " ";//Non-breaking spaces - case '\u2007': return " "; - case '\u202F': return " "; - case '\u3000': return " ";//ideographics space - - case '\u00AD': return "";//soft hyphen - case '\u200b': return "";//zero width space - case '\u200d': return "";//zero width joiner - + case '\u2000'://different sized spaces + case '\u2001': + case '\u2002': + case '\u2003': + case '\u2004': + case '\u2005': + case '\u2006': + case '\u2008': + case '\u2009': + case '\u200A': + case '\u205F': + case '\u00A0'://Non-breaking spaces + case '\u2007': + case '\u202F': + case '\u3000': return " ";//ideographic space + + case '\u00AD'://soft hyphen + case '\u200b'://zero width space + case '\u200d'://zero width joiner case '\uFEFF': return "";//BOM-found at the start of some UTF files default: throw new PreProcessingException("Unrecognised unicode character: " + c); @@ -294,7 +312,7 @@ * @param array The array. * @return The ArrayList. */ - public static List arrayToList(String [] array) { + static List arrayToList(String [] array) { List list = new ArrayList(); list.addAll(Arrays.asList(array)); return list; @@ -305,9 +323,9 @@ * @param locantText * @return */ - public static String removeDashIfPresent(String locantText){ + static String removeDashIfPresent(String locantText){ if(locantText.endsWith("-")) { - locantText = locantText.substring(0, locantText.length()-1); + locantText = locantText.substring(0, locantText.length() - 1); } return locantText; } @@ -317,10 +335,10 @@ * @param locantText * @return */ - public static int countTerminalPrimes(String locantText){ + static int countTerminalPrimes(String locantText){ int numberOfPrimes = 0; - for(int k = locantText.length() -1; k>0; k--){ - if (locantText.charAt(k)=='\''){ + for(int i = locantText.length() -1; i > 0; i--){ + if (locantText.charAt(i) == '\''){ numberOfPrimes++; } else{ @@ -331,16 +349,42 @@ } /** + * Tests if this string start with the specified prefix ignoring case. + * @param str + * @param prefix + * @return + */ + static boolean startsWithCaseInsensitive(String str, String prefix) { + return str.regionMatches(true, 0, prefix, 0, prefix.length()); + } + + /** * Tests if this string ends with the specified suffix ignoring case. * @param str * @param suffix * @return */ - public static boolean endsWithCaseInsensitive(String str, String suffix) { + static boolean endsWithCaseInsensitive(String str, String suffix) { if (suffix.length() > str.length()) { return false; } int strOffset = str.length() - suffix.length(); return str.regionMatches(true, strOffset, suffix, 0, suffix.length()); } + + /** + * Lower cases a string (only converts A-Z to a-z) + * @param str + */ + static String lowerCaseAsciiString(String str) { + StringBuilder sb = new StringBuilder(str.length()); + for (int i = 0, l = str.length(); i < l; i++) { + char c = str.charAt(i); + if (c >= 'A' && c <= 'Z') { + c = (char) (c + 32); + } + sb.append(c); + } + return sb.toString(); + } } diff -Nru opsin-1.5.0/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/StructureBuilder.java opsin-2.3.1/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/StructureBuilder.java --- opsin-1.5.0/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/StructureBuilder.java 2013-07-21 14:02:29.000000000 +0000 +++ opsin-2.3.1/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/StructureBuilder.java 2017-07-23 20:55:18.000000000 +0000 @@ -8,14 +8,10 @@ import java.util.ListIterator; import java.util.Map; import java.util.Set; -import java.util.Stack; +import java.util.regex.Pattern; import uk.ac.cam.ch.wwmm.opsin.StereoAnalyser.StereoBond; import uk.ac.cam.ch.wwmm.opsin.StereoAnalyser.StereoCentre; - - -import nu.xom.Element; -import nu.xom.Elements; import static uk.ac.cam.ch.wwmm.opsin.XmlDeclarations.*; import static uk.ac.cam.ch.wwmm.opsin.OpsinTools.*; import static uk.ac.cam.ch.wwmm.opsin.StructureBuildingMethods.*; @@ -27,147 +23,168 @@ * */ class StructureBuilder { + private final BuildState state; + private final List polymerAttachmentPoints = new ArrayList();//rGroups need to be represented as normal atoms for the purpose of working out stereochemistry. They will be converted to a suitable representation later + + private int currentTopLevelWordRuleCount; + + StructureBuilder(BuildState state) { + this.state = state; + } + /** Builds a molecule as a Fragment based on ComponentProcessor output. - * @param state * @param molecule The ComponentProcessor output. * @return A single Fragment - the built molecule. * @throws StructureBuildingException If the molecule won't build - there may be many reasons. */ - Fragment buildFragment(BuildState state, Element molecule) throws StructureBuildingException { - Elements wordRules = molecule.getChildElements(WORDRULE_EL); - if (wordRules.size()==0){ - throw new StructureBuildingException("Molecule contains no words!?"); - } - Stack wordRuleStack = new Stack(); - for (int i = wordRules.size() -1; i >=0; i--) { - wordRuleStack.add(wordRules.get(i)); + Fragment buildFragment(Element molecule) throws StructureBuildingException { + List wordRules = molecule.getChildElements(WORDRULE_EL); + + currentTopLevelWordRuleCount = wordRules.size(); + if (currentTopLevelWordRuleCount == 0) { + throw new StructureBuildingException("Molecule contains no word rules!?"); } - List rGroups = new ArrayList();//rGroups need to represented as normal atoms for the purpose of working out stereochemistry. They will be converted to a suitable representation later - List wordRulesVisited = new ArrayList(); - while (wordRuleStack.size()>0) { - Element nextWordRuleEl = wordRuleStack.peek();//just has a look what's next - if(!wordRulesVisited.contains(nextWordRuleEl)){ - wordRulesVisited.add(nextWordRuleEl); - Elements wordRuleChildren = nextWordRuleEl.getChildElements(WORDRULE_EL); - if (wordRuleChildren.size()!=0){//nested word rules - for (int i = wordRuleChildren.size() -1; i >=0; i--) { - wordRuleStack.add(wordRuleChildren.get(i)); - } - continue; - } - } - Element currentWordRuleEl = wordRuleStack.pop(); - WordRule wordRule = WordRule.valueOf(currentWordRuleEl.getAttributeValue(WORDRULE_ATR)); - List words = XOMTools.getChildElementsWithTagNames(currentWordRuleEl, new String[]{WORD_EL, WORDRULE_EL}); - state.currentWordRule =wordRule; - if(wordRule == WordRule.simple) { - for (Element word : words) { - if (!word.getLocalName().equals(WORD_EL) || !word.getAttributeValue(TYPE_ATR).equals(WordType.full.toString())){ - throw new StructureBuildingException("OPSIN bug: Unexpected contents of 'simple' wordRule"); - } - resolveWordOrBracket(state, word); - } - } - else if(wordRule == WordRule.substituent) { - for (Element word : words) { - if (!word.getLocalName().equals(WORD_EL) || !word.getAttributeValue(TYPE_ATR).equals(WordType.substituent.toString()) || !state.n2sConfig.isAllowRadicals()){ - throw new StructureBuildingException("OPSIN bug: Unexpected contents of 'substituent' wordRule"); - } - resolveWordOrBracket(state, word); - } - } - else if(wordRule == WordRule.ester || wordRule == WordRule.multiEster) { - buildEster(state, words);//e.g. ethyl ethanoate, dimethyl terephthalate, methyl propanamide - } - else if (wordRule == WordRule.divalentFunctionalGroup){ - buildDiValentFunctionalGroup(state, words);// diethyl ether or methyl propyl ketone - } - else if (wordRule == WordRule.monovalentFunctionalGroup){ - buildMonovalentFunctionalGroup(state, words);// ethyl chloride, isophthaloyl dichloride, diethyl ether, ethyl alcohol - } - else if(wordRule == WordRule.functionalClassEster) { - buildFunctionalClassEster(state, words);//e.g. ethanoic acid ethyl ester, tetrathioterephthalic acid dimethyl ester - } - else if (wordRule == WordRule.acidReplacingFunctionalGroup){ - //e.g. ethanoic acid ethyl amide, terephthalic acid dimethyl amide, - //ethanoic acid amide, carbonic dihydrazide - //already processed by the ComponentProcessor - for (Element word : words) { - resolveWordOrBracket(state, word); - } - } - else if(wordRule == WordRule.oxide) { - buildOxide(state, words);//e.g. styrene oxide, triphenylphosphane oxide, thianthrene 5,5-dioxide, propan-2-one oxide - } - else if(wordRule == WordRule.carbonylDerivative) { - buildCarbonylDerivative(state, words);//e.g. Imidazole-2-carboxamide O-ethyloxime, pentan-3-one oxime - } - else if(wordRule == WordRule.anhydride) {//e.g. acetic anhydride - buildAnhydride(state, words); - } - else if(wordRule == WordRule.acidHalideOrPseudoHalide) {//e.g. phosphinimidic chloride - buildAcidHalideOrPseudoHalide(state, words); - } - else if(wordRule == WordRule.additionCompound) {//e.g. carbon tetrachloride - buildAdditionCompound(state, words); - } - else if (wordRule == WordRule.glycol){ - buildGlycol(state, words);//e.g. ethylene glycol - } - else if (wordRule == WordRule.glycolEther){ - buildGlycolEther(state, words);//e.g. octaethyleneglycol monododecyl ether - } - else if(wordRule == WordRule.acetal) { - buildAcetal(state, words);//e.g. propanal diethyl acetal - } - else if(wordRule == WordRule.potentialBiochemicalEster) { - //will be processed as two "simple" wordrules if no hydroxy found - buildBiochemicalEster(state, words, wordRules.size());//e.g. uridine 5'-(tetrahydrogen triphosphate) - } - else if(wordRule == WordRule.cyclicPeptide) { - buildCyclicPeptide(state, words); - } - else if(wordRule == WordRule.polymer) { - rGroups.addAll(buildPolymer(state, words)); - } - else{ - throw new StructureBuildingException("Unknown Word Rule"); - } + for (Element wordRule : wordRules) { + processWordRuleChildrenThenRule(wordRule); } - List groupElements = XOMTools.getDescendantElementsWithTagName(molecule, GROUP_EL); - processOxidoSpecialCase(state, groupElements); - processOxidationNumbers(state, groupElements); + if (currentTopLevelWordRuleCount != wordRules.size()) { + wordRules = molecule.getChildElements(WORDRULE_EL);//very rarely a word rule adds a top level word rule + } + + List groupElements = OpsinTools.getDescendantElementsWithTagName(molecule, GROUP_EL); + processOxidoAndMethionineSpecialCases(groupElements); + processOxidationNumbers(groupElements); state.fragManager.convertSpareValenciesToDoubleBonds(); state.fragManager.checkValencies(); - boolean explicitStoichiometryPresent = applyExplicitStoichiometryIfProvided(state, wordRules); - int overallCharge = state.fragManager.getOverallCharge(); - if (overallCharge!=0 && wordRules.size() >1){//a net charge is present! Could just mean the counterion has not been specified though - balanceChargeIfPossible(state, molecule, overallCharge, explicitStoichiometryPresent); - } - makeHydrogensExplicit(state); + manipulateStoichiometry(molecule, wordRules); + + state.fragManager.makeHydrogensExplicit(); Fragment uniFrag = state.fragManager.getUnifiedFragment(); - processStereochemistry(state, molecule, uniFrag); + processStereochemistry(molecule, uniFrag); - if (uniFrag.getOutAtomCount()>0 && !state.n2sConfig.isAllowRadicals()){ - throw new StructureBuildingException("Radicals are currently set to not convert to structures"); - } - if (state.n2sConfig.isOutputRadicalsAsWildCardAtoms()) { - rGroups.addAll(convertOutAtomsToRgroups(state, uniFrag)); + if (uniFrag.getOutAtomCount() > 0) { + if (!state.n2sConfig.isAllowRadicals()) { + throw new StructureBuildingException("Radicals are currently set to not convert to structures"); + } + if (state.n2sConfig.isOutputRadicalsAsWildCardAtoms()) { + convertOutAtomsToAttachmentAtoms(uniFrag); + } } - for (Fragment rGroup : rGroups) { - Atom rAtom = rGroup.getFirstAtom(); - rAtom.setElement("R"); + if (polymerAttachmentPoints.size() > 0) { + for (Atom rAtom : polymerAttachmentPoints) { + rAtom.setElement(ChemEl.R); + } + uniFrag.setPolymerAttachmentPoints(polymerAttachmentPoints); } - return uniFrag; } - private void buildEster(BuildState state, List words) throws StructureBuildingException { + + private void processWordRuleChildrenThenRule(Element wordRule) throws StructureBuildingException { + List wordRuleChildren = wordRule.getChildElements(WORDRULE_EL); + for (Element wordRuleChild : wordRuleChildren) { + processWordRuleChildrenThenRule(wordRuleChild); + } + processWordRule(wordRule); + } + + private void processWordRule(Element wordRuleEl) throws StructureBuildingException { + WordRule wordRule = WordRule.valueOf(wordRuleEl.getAttributeValue(WORDRULE_ATR)); + List words = OpsinTools.getChildElementsWithTagNames(wordRuleEl, new String[]{WORD_EL, WORDRULE_EL}); + state.currentWordRule = wordRule; + switch (wordRule) { + case simple: + for (Element word : words) { + if (!word.getName().equals(WORD_EL) || !word.getAttributeValue(TYPE_ATR).equals(WordType.full.toString())){ + throw new StructureBuildingException("OPSIN bug: Unexpected contents of 'simple' wordRule"); + } + resolveWordOrBracket(state, word); + } + break; + case substituent: + for (Element word : words) { + if (!word.getName().equals(WORD_EL) || !word.getAttributeValue(TYPE_ATR).equals(WordType.substituent.toString()) || !state.n2sConfig.isAllowRadicals()){ + throw new StructureBuildingException("OPSIN bug: Unexpected contents of 'substituent' wordRule"); + } + resolveWordOrBracket(state, word); + } + break; + case ester: + case multiEster: + buildEster(words);//e.g. ethyl ethanoate, dimethyl terephthalate, methyl propanamide + break; + case divalentFunctionalGroup: + buildDiValentFunctionalGroup(words);// diethyl ether or methyl propyl ketone + break; + case monovalentFunctionalGroup: + buildMonovalentFunctionalGroup(words);// ethyl chloride, isophthaloyl dichloride, diethyl ether, ethyl alcohol + break; + case functionalClassEster: + buildFunctionalClassEster(words);//e.g. ethanoic acid ethyl ester, tetrathioterephthalic acid dimethyl ester + break; + case acidReplacingFunctionalGroup: + //e.g. ethanoic acid ethyl amide, terephthalic acid dimethyl amide, + //ethanoic acid amide, carbonic dihydrazide + //already processed by the ComponentProcessor + for (Element word : words) { + resolveWordOrBracket(state, word); + } + break; + case oxide: + buildOxide(words);//e.g. styrene oxide, triphenylphosphane oxide, thianthrene 5,5-dioxide, propan-2-one oxide + break; + case carbonylDerivative: + buildCarbonylDerivative(words);//e.g. Imidazole-2-carboxamide O-ethyloxime, pentan-3-one oxime + break; + case anhydride: + buildAnhydride(words);//e.g. acetic anhydride + break; + case acidHalideOrPseudoHalide: + buildAcidHalideOrPseudoHalide(words);//e.g. phosphinimidic chloride + break; + case additionCompound: + buildAdditionCompound(words);//e.g. carbon tetrachloride + break; + case glycol: + buildGlycol(words);//e.g. ethylene glycol + break; + case glycolEther: + buildGlycolEther(words);//e.g. octaethyleneglycol monododecyl ether + break; + case acetal: + buildAcetal(words);//e.g. propanal diethyl acetal + break; + case potentialAlcoholEster: + //e.g. uridine 5'-(tetrahydrogen triphosphate) + if (!buildAlcoholEster(words, currentTopLevelWordRuleCount)){ + //should be processed as two "simple" wordrules if no hydroxy found, hence number of top level word rules may change + //These simple word rules have already been processed + splitAlcoholEsterRuleIntoTwoSimpleWordRules(words); + currentTopLevelWordRuleCount++; + } + break; + case cyclicPeptide: + buildCyclicPeptide(words); + break; + case amineDiConjunctiveSuffix: + //e.g. glycine N,N-diacetic acid + buildAmineDiConjunctiveSuffix(words); + break; + case polymer: + buildPolymer(words); + break; + default: + throw new StructureBuildingException("Unexpected Word Rule"); + } + } + + + private void buildEster(List words) throws StructureBuildingException { boolean inSubstituents = true; BuildResults substituentsBr = new BuildResults(); List ateGroups = new ArrayList(); @@ -175,7 +192,7 @@ for (Element word : words) { resolveWordOrBracket(state, word); - BuildResults br = new BuildResults(state, word); + BuildResults br = new BuildResults(word); if (inSubstituents && br.getFunctionalAtomCount() > 0){ inSubstituents = false; } @@ -198,7 +215,7 @@ } } if (traditionalEster){//e.g. ethylidene dipropanoate - br = new BuildResults(state, word); + br = new BuildResults(word); outAtomCount = br.getOutAtomCount(); } if (outAtomCount ==1){//TODO add support for locanted terepthaloyl @@ -236,6 +253,10 @@ if (outAtomCount > esterIdCount){ throw new StructureBuildingException("There are more radicals in the substituents(" + outAtomCount +") than there are places to form esters("+esterIdCount+")"); } + if (esterIdCount > outAtomCount && outAtomCount % ateGroups.size() !=0) { + //actually checks if the same number of ester forming points would be used in each ate group e.g. ethyl diacetate is wrong + throw new StructureBuildingException("There are less radicals in the substituents(" + outAtomCount +") than there are places to form esters("+esterIdCount+")"); + } for(int i=0; i< outAtomCount; i++) { BuildResults ateBr = ateGroups.get(i % ateGroups.size()); Atom ateAtom; @@ -248,7 +269,7 @@ } String locant = buildResultsToLocant.get(ateBr); if (locant ==null){//typical case - Atom atomOnSubstituentToUse =substituentsBr.getOutAtomTakingIntoAccountWhetherSetExplicitly(0); + Atom atomOnSubstituentToUse = getOutAtomTakingIntoAccountWhetherSetExplicitly(substituentsBr, 0); state.fragManager.createBond(ateAtom, atomOnSubstituentToUse, 1); substituentsBr.removeOutAtom(0); } @@ -273,13 +294,13 @@ - private void buildDiValentFunctionalGroup(BuildState state, List words) throws StructureBuildingException { + private void buildDiValentFunctionalGroup(List words) throws StructureBuildingException { int wordIndice = 0; if (!words.get(wordIndice).getAttributeValue(TYPE_ATR).equals(WordType.substituent.toString())) { throw new StructureBuildingException("word: " +wordIndice +" was expected to be a substituent"); } resolveWordOrBracket(state, words.get(wordIndice)); - BuildResults substituent1 =new BuildResults(state, words.get(wordIndice)); + BuildResults substituent1 =new BuildResults(words.get(wordIndice)); if (substituent1.getOutAtom(0).getValency() !=1){ throw new StructureBuildingException("OutAtom has unexpected valency. Expected 1. Actual: " + substituent1.getOutAtom(0).getValency()); } @@ -297,13 +318,13 @@ wordIndice++; if (words.get(wordIndice).getAttributeValue(TYPE_ATR).equals(WordType.functionalTerm.toString())) {//e.g. methyl sulfoxide rather than dimethyl sulfoxide Element clone = state.fragManager.cloneElement(state, words.get(0)); - XOMTools.insertAfter(words.get(0), clone); - words = OpsinTools.elementsToElementArrayList(((Element)words.get(0).getParent()).getChildElements()); + OpsinTools.insertAfter(words.get(0), clone); + words = words.get(0).getParent().getChildElements(); } else{ resolveWordOrBracket(state, words.get(wordIndice)); } - substituent2 =new BuildResults(state, words.get(wordIndice)); + substituent2 =new BuildResults(words.get(wordIndice)); if (substituent2.getOutAtomCount()!=1){ throw new StructureBuildingException("Expected one outAtom. Found " + substituent2.getOutAtomCount() ); } @@ -315,16 +336,16 @@ if (words.get(wordIndice) ==null || !words.get(wordIndice).getAttributeValue(TYPE_ATR).equals(WordType.functionalTerm.toString())) { throw new StructureBuildingException(words.get(wordIndice).getValue()+" was expected to be a functionalTerm"); } - List functionalGroup = XOMTools.getDescendantElementsWithTagName(words.get(wordIndice), FUNCTIONALGROUP_EL); + List functionalGroup = OpsinTools.getDescendantElementsWithTagName(words.get(wordIndice), FUNCTIONALGROUP_EL); if (functionalGroup.size()!=1){ throw new StructureBuildingException("Unexpected number of functionalGroups found, could be a bug in OPSIN's grammar"); } String smilesOfGroup = functionalGroup.get(0).getAttributeValue(VALUE_ATR); Fragment diValentGroup =state.fragManager.buildSMILES(smilesOfGroup, FUNCTIONALCLASS_TYPE_VAL, NONE_LABELS_VAL); - Atom outAtom1 =substituent1.getOutAtomTakingIntoAccountWhetherSetExplicitly(0); + Atom outAtom1 = getOutAtomTakingIntoAccountWhetherSetExplicitly(substituent1, 0); substituent1.removeOutAtom(0); - Atom outAtom2 = substituent2.getOutAtomTakingIntoAccountWhetherSetExplicitly(0); + Atom outAtom2 = getOutAtomTakingIntoAccountWhetherSetExplicitly(substituent2, 0); substituent2.removeOutAtom(0); if (diValentGroup.getOutAtomCount()==1){//c.f. peroxide where it is a linker state.fragManager.createBond(outAtom1, diValentGroup.getOutAtom(0).getAtom(), 1); @@ -343,11 +364,11 @@ state.fragManager.incorporateFragment(diValentGroup, outAtom1.getFrag()); } - private void buildMonovalentFunctionalGroup(BuildState state, List words) throws StructureBuildingException { + private void buildMonovalentFunctionalGroup(List words) throws StructureBuildingException { resolveWordOrBracket(state, words.get(0)); - List groups = XOMTools.getDescendantElementsWithTagName(words.get(0), GROUP_EL); + List groups = OpsinTools.getDescendantElementsWithTagName(words.get(0), GROUP_EL); for (Element group : groups) {//replaces outAtoms with valency greater than 1 with multiple outAtoms; e.g. ylidene -->diyl - Fragment frag = state.xmlFragmentMap.get(group); + Fragment frag = group.getFrag(); for (int i = frag.getOutAtomCount()-1; i>=0; i--) { OutAtom outAtom =frag.getOutAtom(i); if (outAtom.getValency()>1){ @@ -355,22 +376,22 @@ } } } - BuildResults substituentBR = new BuildResults(state, words.get(0)); + BuildResults substituentBR = new BuildResults(words.get(0)); List functionalGroupFragments = new ArrayList(); for (int i=1; i functionalGroups = XOMTools.getDescendantElementsWithTagName(functionalGroupWord, FUNCTIONALGROUP_EL); + List functionalGroups = OpsinTools.getDescendantElementsWithTagName(functionalGroupWord, FUNCTIONALGROUP_EL); if (functionalGroups.size()!=1){ throw new StructureBuildingException("Expected exactly 1 functionalGroup. Found " + functionalGroups.size()); } Fragment monoValentFunctionGroup =state.fragManager.buildSMILES(functionalGroups.get(0).getAttributeValue(VALUE_ATR), FUNCTIONALCLASS_TYPE_VAL, NONE_LABELS_VAL); if (functionalGroups.get(0).getAttributeValue(TYPE_ATR).equals(MONOVALENTSTANDALONEGROUP_TYPE_VAL)){ - Atom ideAtom = monoValentFunctionGroup.getDefaultInAtom(); + Atom ideAtom = monoValentFunctionGroup.getDefaultInAtomOrFirstAtom(); ideAtom.addChargeAndProtons(1, 1);//e.g. make cyanide charge netural } - Element possibleMultiplier = (Element) XOMTools.getPreviousSibling(functionalGroups.get(0)); + Element possibleMultiplier = OpsinTools.getPreviousSibling(functionalGroups.get(0)); functionalGroupFragments.add(monoValentFunctionGroup); if (possibleMultiplier!=null){ int multiplierValue = Integer.parseInt(possibleMultiplier.getAttributeValue(VALUE_ATR)); @@ -396,26 +417,27 @@ } for (int i = 0; i < outAtomCount; i++) { Fragment ideFrag =functionalGroupFragments.get(i); - Atom ideAtom = ideFrag.getDefaultInAtom(); - Atom subAtom=substituentBR.getOutAtomTakingIntoAccountWhetherSetExplicitly(0); + Atom ideAtom = ideFrag.getDefaultInAtomOrFirstAtom(); + Atom subAtom = getOutAtomTakingIntoAccountWhetherSetExplicitly(substituentBR, 0); state.fragManager.createBond(ideAtom, subAtom, 1); substituentBR.removeOutAtom(0); state.fragManager.incorporateFragment(ideFrag, subAtom.getFrag()); } } - private void buildFunctionalClassEster(BuildState state, List words) throws StructureBuildingException { - if (!words.get(0).getAttributeValue(TYPE_ATR).equals(WordType.full.toString())){ + private void buildFunctionalClassEster(List words) throws StructureBuildingException { + Element firstWord = words.get(0); + if (!firstWord.getAttributeValue(TYPE_ATR).equals(WordType.full.toString())) { throw new StructureBuildingException("Don't alter wordRules.xml without checking the consequences!"); } - resolveWordOrBracket(state, words.get(0));//the group - BuildResults acidBr = new BuildResults(state, words.get(0)); + resolveWordOrBracket(state, firstWord);//the group + BuildResults acidBr = new BuildResults(firstWord); - if (acidBr.getFunctionalAtomCount()==0){ + if (acidBr.getFunctionalAtomCount()==0) { throw new StructureBuildingException("No functionalAtoms detected!"); } - int wordCountMinus1 = words.size() -1; + int wordCountMinus1 = words.size() - 1; if (wordCountMinus1 < 2 || !words.get(wordCountMinus1).getAttributeValue(TYPE_ATR).equals(WordType.functionalTerm.toString())) { throw new StructureBuildingException("OPSIN Bug: Bug in functionalClassEster rule; 'ester' not found where it was expected"); } @@ -423,51 +445,48 @@ for (int i = 1; i < wordCountMinus1; i++) { Element currentWord = words.get(i); String wordType = currentWord.getAttributeValue(TYPE_ATR); - if (!wordType.equals(WordType.substituent.toString())){ - if (wordType.equals(WordType.functionalTerm.toString()) && currentWord.getAttributeValue(VALUE_ATR).equalsIgnoreCase("ester")){ + if (!wordType.equals(WordType.substituent.toString())) { + if (wordType.equals(WordType.functionalTerm.toString()) && currentWord.getAttributeValue(VALUE_ATR).equalsIgnoreCase("ester")) { //superfluous ester word continue; } throw new StructureBuildingException("OPSIN Bug: Bug in functionalClassEster rule; Encountered: " + currentWord.getAttributeValue(VALUE_ATR)); } - if (acidBr.getFunctionalAtomCount()==0){ + resolveWordOrBracket(state, currentWord); + BuildResults substituentBr = new BuildResults(currentWord); + int outAtomCount = substituentBr.getOutAtomCount(); + if (acidBr.getFunctionalAtomCount() < outAtomCount) { throw new StructureBuildingException("Insufficient functionalAtoms on acid"); } - resolveWordOrBracket(state, currentWord); - BuildResults substituentBr = new BuildResults(state, currentWord); - if (substituentBr.getOutAtomCount() ==1){ + for (int j = 0; j < outAtomCount; j++) { String locantForSubstituent = currentWord.getAttributeValue(LOCANT_ATR); Atom functionalAtom; - if (locantForSubstituent!=null){ - functionalAtom =determineFunctionalAtomToUse(locantForSubstituent, acidBr); + if (locantForSubstituent != null) { + functionalAtom = determineFunctionalAtomToUse(locantForSubstituent, acidBr); } else{ - functionalAtom =acidBr.getFunctionalAtom(0); + functionalAtom = acidBr.getFunctionalAtom(0); acidBr.removeFunctionalAtom(0); } - if (substituentBr.getOutAtom(0).getValency()!=1){ + if (substituentBr.getOutAtom(j).getValency() != 1) { throw new StructureBuildingException("Substituent was expected to have only have an outgoing valency of 1"); } - state.fragManager.createBond(functionalAtom,substituentBr.getOutAtomTakingIntoAccountWhetherSetExplicitly(0), 1); - if (functionalAtom.getCharge()==-1){ + state.fragManager.createBond(functionalAtom, getOutAtomTakingIntoAccountWhetherSetExplicitly(substituentBr, j), 1); + if (functionalAtom.getCharge() == -1) { functionalAtom.neutraliseCharge(); } - substituentBr.removeOutAtom(0); - } - else { - throw new StructureBuildingException("Substituent was expected to have one outAtom"); } + substituentBr.removeAllOutAtoms(); } } /** * Handles names like thiophene 1,1-dioxide; carbon dioxide; benzene oxide * Does the same for sulfide/selenide/telluride - * @param state * @param words * @throws StructureBuildingException */ - private void buildOxide(BuildState state, List words) throws StructureBuildingException { + private void buildOxide(List words) throws StructureBuildingException { resolveWordOrBracket(state, words.get(0));//the group List oxideFragments = new ArrayList(); List locantsForOxide =new ArrayList();//often not specified @@ -475,8 +494,8 @@ throw new StructureBuildingException("Oxide functional term not found where expected!"); } Element rightMostGroup; - if (words.get(0).getLocalName().equals(WORDRULE_EL)){//e.g. Nicotinic acid N-oxide - List fullWords = XOMTools.getDescendantElementsWithTagNameAndAttribute(words.get(0), WORD_EL, TYPE_ATR, WordType.full.toString()); + if (words.get(0).getName().equals(WORDRULE_EL)){//e.g. Nicotinic acid N-oxide + List fullWords = OpsinTools.getDescendantElementsWithTagNameAndAttribute(words.get(0), WORD_EL, TYPE_ATR, WordType.full.toString()); if (fullWords.size()==0){ throw new StructureBuildingException("OPSIN is entirely unsure where the oxide goes so has decided not to guess"); } @@ -487,7 +506,7 @@ } int numberOfOxygenToAdd =1; - List multipliers =XOMTools.getDescendantElementsWithTagName(words.get(1), MULTIPLIER_EL); + List multipliers =OpsinTools.getDescendantElementsWithTagName(words.get(1), MULTIPLIER_EL); if (multipliers.size() >1){ throw new StructureBuildingException("Expected 0 or 1 multiplier found: " + multipliers.size()); } @@ -497,7 +516,7 @@ } else{ if (ELEMENTARYATOM_SUBTYPE_VAL.equals(rightMostGroup.getAttributeValue(SUBTYPE_ATR))){ - Atom elementaryAtom = state.xmlFragmentMap.get(rightMostGroup).getFirstAtom(); + Atom elementaryAtom = rightMostGroup.getFrag().getFirstAtom(); int charge = elementaryAtom.getCharge(); if (charge >0 && charge %2 ==0){ numberOfOxygenToAdd = charge/2; @@ -510,50 +529,53 @@ } } } - List functionalGroup =XOMTools.getDescendantElementsWithTagName(words.get(1), FUNCTIONALGROUP_EL); + List functionalGroup =OpsinTools.getDescendantElementsWithTagName(words.get(1), FUNCTIONALGROUP_EL); if (functionalGroup.size()!=1){ throw new StructureBuildingException("Expected 1 group element found: " + functionalGroup.size()); } String smilesReplacement = functionalGroup.get(0).getAttributeValue(VALUE_ATR); String labels = functionalGroup.get(0).getAttributeValue(LABELS_ATR); for (int i = 0; i < numberOfOxygenToAdd; i++) { - oxideFragments.add(state.fragManager.buildSMILES(smilesReplacement, FUNCTIONALCLASS_TYPE_VAL, labels)); + oxideFragments.add(state.fragManager.buildSMILES(smilesReplacement, FUNCTIONALCLASS_TYPE_VAL, labels != null ? labels : NONE_LABELS_VAL)); } - List locantEls =XOMTools.getDescendantElementsWithTagName(words.get(1), LOCANT_EL); + List locantEls =OpsinTools.getDescendantElementsWithTagName(words.get(1), LOCANT_EL); if (locantEls.size() >1){ throw new StructureBuildingException("Expected 0 or 1 locant elements found: " + locantEls.size()); } if (locantEls.size()==1){ - String[] locants = MATCH_COMMA.split(StringTools.removeDashIfPresent(locantEls.get(0).getValue())); - locantsForOxide.addAll(Arrays.asList(locants)); + String[] locants = StringTools.removeDashIfPresent(locantEls.get(0).getValue()).split(","); + locantsForOxide.addAll(Arrays.asList(locants)); locantEls.get(0).detach(); } if (!locantsForOxide.isEmpty() && locantsForOxide.size()!=oxideFragments.size()){ throw new StructureBuildingException("Mismatch between number of locants and number of oxides specified"); } List orderedPossibleFragments = new ArrayList();//In preference suffixes are substituted onto e.g. acetonitrile oxide - Elements suffixEls = ((Element)rightMostGroup.getParent()).getChildElements(SUFFIX_EL); + List suffixEls = rightMostGroup.getParent().getChildElements(SUFFIX_EL); for (int i = suffixEls.size()-1; i >=0; i--) {//suffixes (if any) from right to left Element suffixEl = suffixEls.get(i); - Fragment suffixFrag =state.xmlFragmentMap.get(suffixEl); + Fragment suffixFrag = suffixEl.getFrag(); if (suffixFrag!=null){ orderedPossibleFragments.add(suffixFrag); } } - Fragment groupToModify = state.xmlFragmentMap.get(rightMostGroup);//all the suffixes are actually part of this fragment already + Fragment groupToModify = rightMostGroup.getFrag();//all the suffixes are actually part of this fragment already orderedPossibleFragments.add(groupToModify); mainLoop: for (int i = 0; i < oxideFragments.size(); i++) { Atom oxideAtom = oxideFragments.get(i).getFirstAtom(); if (!locantsForOxide.isEmpty()){ Atom atomToAddOxideTo =groupToModify.getAtomByLocantOrThrow(locantsForOxide.get(i)); - formAppropriateBondToOxideAndAdjustCharges(state, atomToAddOxideTo, oxideAtom); + if (atomToAddOxideTo.getElement() == ChemEl.C && !ELEMENTARYATOM_SUBTYPE_VAL.equals(groupToModify.getSubType())) { + throw new StructureBuildingException("Locant " + locantsForOxide.get(i) + " indicated oxide applied to carbon, but this would lead to hypervalency!"); + } + formAppropriateBondToOxideAndAdjustCharges(atomToAddOxideTo, oxideAtom); } else{ for (Fragment frag : orderedPossibleFragments) { - String subTypeVal = state.xmlFragmentMap.getElement(frag).getAttributeValue(SUBTYPE_ATR); + String subTypeVal = frag.getSubType(); if (ELEMENTARYATOM_SUBTYPE_VAL.equals(subTypeVal)){ Atom elementaryAtom= frag.getFirstAtom(); - formAppropriateBondToOxideAndAdjustCharges(state, elementaryAtom, oxideAtom);//e.g. carbon dioxide + formAppropriateBondToOxideAndAdjustCharges(elementaryAtom, oxideAtom);//e.g. carbon dioxide int chargeOnAtom =elementaryAtom.getCharge(); if (chargeOnAtom>=2){ elementaryAtom.setCharge(chargeOnAtom-2); @@ -563,8 +585,8 @@ else{ List atomList = frag.getAtomList(); for (Atom atom : atomList) { - if (!atom.getElement().equals("C") && !atom.getElement().equals("O")){ - formAppropriateBondToOxideAndAdjustCharges(state, atom, oxideAtom); + if (atom.getElement() != ChemEl.C && atom.getElement() != ChemEl.O){ + formAppropriateBondToOxideAndAdjustCharges(atom, oxideAtom); continue mainLoop; } } @@ -573,7 +595,7 @@ //No heteroatoms could be found. Perhaps it's supposed to be something like styrene oxide Set bondSet = groupToModify.getBondSet();//looking for double bond for (Bond bond : bondSet) { - if (bond.getOrder()==2 && bond.getFromAtom().getElement().equals("C") && bond.getToAtom().getElement().equals("C")){ + if (bond.getOrder()==2 && bond.getFromAtom().getElement() == ChemEl.C && bond.getToAtom().getElement() == ChemEl.C){ bond.setOrder(1); state.fragManager.createBond(bond.getFromAtom(), oxideAtom, 1); state.fragManager.createBond(bond.getToAtom(), oxideAtom, 1); @@ -585,7 +607,7 @@ for (Bond bond : bondSet) { Atom fromAtom =bond.getFromAtom(); Atom toAtom = bond.getToAtom(); - if (fromAtom.hasSpareValency() && toAtom.hasSpareValency() &&fromAtom.getElement().equals("C") && toAtom.getElement().equals("C")){ + if (fromAtom.hasSpareValency() && toAtom.hasSpareValency() &&fromAtom.getElement() == ChemEl.C && toAtom.getElement() == ChemEl.C){ fromAtom.setSpareValency(false); toAtom.setSpareValency(false); state.fragManager.createBond(fromAtom, oxideAtom, 1); @@ -596,8 +618,8 @@ for (Fragment frag : orderedPossibleFragments) {//something like where oxide goes on an oxygen propan-2-one oxide List atomList = frag.getAtomList(); for (Atom atom : atomList) { - if (!atom.getElement().equals("C")){ - formAppropriateBondToOxideAndAdjustCharges(state, atom, oxideAtom); + if (atom.getElement() != ChemEl.C){ + formAppropriateBondToOxideAndAdjustCharges(atom, oxideAtom); continue mainLoop; } } @@ -613,12 +635,11 @@ /** * Decides whether an oxide should double bond e.g. P=O or single bond as a zwitterionic form e.g. [N+]-[O-] * Corrects the charges if necessary and forms the bond - * @param state * @param atomToAddOxideTo * @param oxideAtom * @throws StructureBuildingException */ - private void formAppropriateBondToOxideAndAdjustCharges(BuildState state, Atom atomToAddOxideTo, Atom oxideAtom) throws StructureBuildingException { + private void formAppropriateBondToOxideAndAdjustCharges(Atom atomToAddOxideTo, Atom oxideAtom) throws StructureBuildingException { Integer maxVal = ValencyChecker.getMaximumValency(atomToAddOxideTo.getElement(), atomToAddOxideTo.getCharge()); if (maxVal ==null || (atomToAddOxideTo.getIncomingValency() + atomToAddOxideTo.getOutValency() +2) <= maxVal){ if (atomToAddOxideTo.getLambdaConventionValency()==null || !ValencyChecker.checkValencyAvailableForBond(atomToAddOxideTo, 2)){//probably in well formed names 2 protons should always be added but some names use the lambdaConvention to specify the valency after oxide has been applied @@ -640,7 +661,7 @@ } } - private void buildCarbonylDerivative(BuildState state, List words) throws StructureBuildingException { + private void buildCarbonylDerivative(List words) throws StructureBuildingException { if (!WordType.full.toString().equals(words.get(0).getAttributeValue(TYPE_ATR))){ throw new StructureBuildingException("OPSIN bug: Wrong word type encountered when applying carbonylDerivative wordRule"); } @@ -648,15 +669,15 @@ List locantForFunctionalTerm =new ArrayList();//usually not specified if (!words.get(1).getAttributeValue(TYPE_ATR).equals(WordType.functionalTerm.toString())){//e.g. acetone O-ethyloxime or acetone 1-chloro-1-methylhydrazone for (int i = 1; i < words.size(); i++) { - Fragment frag = state.xmlFragmentMap.get(findRightMostGroupInWordOrWordRule(words.get(i))); + Fragment frag = findRightMostGroupInWordOrWordRule(words.get(i)).getFrag(); replacementFragments.add(frag); - Elements children =words.get(i).getChildElements(); - if (children.size()==1 && children.get(0).getLocalName().equals(BRACKET_EL) && children.get(0).getAttribute(LOCANT_ATR)!=null){ + List children =words.get(i).getChildElements(); + if (children.size()==1 && children.get(0).getName().equals(BRACKET_EL) && children.get(0).getAttribute(LOCANT_ATR)!=null){ locantForFunctionalTerm.add(children.get(0).getAttributeValue(LOCANT_ATR)); } else if (children.size()==2 && children.get(0).getAttribute(LOCANT_ATR)!=null ){ String locant =children.get(0).getAttributeValue(LOCANT_ATR); - if (children.get(1).getLocalName().equals(ROOT_EL) && !frag.hasLocant(locant) && MATCH_NUMERIC_LOCANT.matcher(locant).matches()){ //e.g. 1,3-benzothiazole-2-carbaldehyde 2-phenylhydrazone + if (children.get(1).getName().equals(ROOT_EL) && !frag.hasLocant(locant) && MATCH_NUMERIC_LOCANT.matcher(locant).matches()){ //e.g. 1,3-benzothiazole-2-carbaldehyde 2-phenylhydrazone locantForFunctionalTerm.add(children.get(0).getAttributeValue(LOCANT_ATR)); children.get(0).removeAttribute(children.get(0).getAttribute(LOCANT_ATR)); } @@ -665,7 +686,7 @@ } else{//e.g. butan-2,3-dione dioxime or hexan2,3-dione 2-oxime int numberOfCarbonylReplacements =1; - List multipliers =XOMTools.getDescendantElementsWithTagName(words.get(1), MULTIPLIER_EL); + List multipliers =OpsinTools.getDescendantElementsWithTagName(words.get(1), MULTIPLIER_EL); if (multipliers.size() >1){ throw new StructureBuildingException("Expected 0 or 1 multiplier found: " + multipliers.size()); } @@ -673,14 +694,14 @@ numberOfCarbonylReplacements = Integer.parseInt(multipliers.get(0).getAttributeValue(VALUE_ATR)); multipliers.get(0).detach(); } - List functionalGroup =XOMTools.getDescendantElementsWithTagName(words.get(1), FUNCTIONALGROUP_EL); + List functionalGroup =OpsinTools.getDescendantElementsWithTagName(words.get(1), FUNCTIONALGROUP_EL); if (functionalGroup.size()!=1){ throw new StructureBuildingException("Expected 1 functionalGroup element found: " + functionalGroup.size()); } String smilesReplacement = functionalGroup.get(0).getAttributeValue(VALUE_ATR); String labels = functionalGroup.get(0).getAttributeValue(LABELS_ATR); for (int i = 0; i < numberOfCarbonylReplacements; i++) { - Fragment replacementFragment = state.fragManager.buildSMILES(smilesReplacement, FUNCTIONALCLASS_TYPE_VAL, labels); + Fragment replacementFragment = state.fragManager.buildSMILES(smilesReplacement, FUNCTIONALCLASS_TYPE_VAL, labels != null ? labels : NONE_LABELS_VAL); if (i >0){ FragmentTools.relabelLocants(replacementFragment.getAtomList(), StringTools.multiplyString("'", i)); } @@ -690,13 +711,13 @@ } replacementFragments.add(replacementFragment); } - List locantEls =XOMTools.getDescendantElementsWithTagName(words.get(1), LOCANT_EL); + List locantEls =OpsinTools.getDescendantElementsWithTagName(words.get(1), LOCANT_EL); if (locantEls.size() >1){ throw new StructureBuildingException("Expected 0 or 1 locant elements found: " + locantEls.size()); } - if (locantEls.size()==1){ - String[] locants = MATCH_COMMA.split(StringTools.removeDashIfPresent(locantEls.get(0).getValue())); - locantForFunctionalTerm.addAll(Arrays.asList(locants)); + if (locantEls.size() == 1) { + String[] locants = StringTools.removeDashIfPresent(locantEls.get(0).getValue()).split(","); + locantForFunctionalTerm.addAll(Arrays.asList(locants)); locantEls.get(0).detach(); } } @@ -705,35 +726,35 @@ } Element rightMostGroup = findRightMostGroupInWordOrWordRule(words.get(0)); - Element parent = (Element) rightMostGroup.getParent(); + Element parent = rightMostGroup.getParent(); boolean multiplied =false; while (!parent.equals(words.get(0))){ if (parent.getAttribute(MULTIPLIER_ATR)!=null){ multiplied =true; } - parent =(Element) parent.getParent(); + parent = parent.getParent(); } if (!multiplied){ - List carbonylOxygens = findCarbonylOxygens(state.xmlFragmentMap.get(rightMostGroup), locantForFunctionalTerm); + List carbonylOxygens = findCarbonylOxygens(rightMostGroup.getFrag(), locantForFunctionalTerm); int replacementsToPerform = Math.min(replacementFragments.size(), carbonylOxygens.size()); - replaceCarbonylOxygenWithReplacementFragments(state, words, replacementFragments, carbonylOxygens, replacementsToPerform); + replaceCarbonylOxygenWithReplacementFragments(words, replacementFragments, carbonylOxygens, replacementsToPerform); } resolveWordOrBracket(state, words.get(0));//the component if (replacementFragments.size() >0){ //Note that the right most group may be multiplied e.g. 3,3'-methylenebis(2,4,6-trimethylbenzaldehyde) disemicarbazone //or the carbonyl may not even be on the right most group e.g. 4-oxocyclohexa-2,5-diene-1-carboxylic acid 4-oxime - BuildResults br = new BuildResults(state, words.get(0)); + BuildResults br = new BuildResults(words.get(0)); List carbonylOxygens = new ArrayList(); List fragments = new ArrayList(br.getFragments()); for (ListIterator iterator = fragments.listIterator(fragments.size()); iterator.hasPrevious();) {//iterate in reverse order - right most groups preferred carbonylOxygens.addAll(findCarbonylOxygens(iterator.previous(), locantForFunctionalTerm)); } - replaceCarbonylOxygenWithReplacementFragments(state, words, replacementFragments, carbonylOxygens, replacementFragments.size()); + replaceCarbonylOxygenWithReplacementFragments(words, replacementFragments, carbonylOxygens, replacementFragments.size()); } } - private void replaceCarbonylOxygenWithReplacementFragments(BuildState state, List words, List replacementFragments, List carbonylOxygens, int functionalReplacementsToPerform) throws StructureBuildingException { + private void replaceCarbonylOxygenWithReplacementFragments(List words, List replacementFragments, List carbonylOxygens, int functionalReplacementsToPerform) throws StructureBuildingException { if (functionalReplacementsToPerform > carbonylOxygens.size()){ throw new StructureBuildingException("Insufficient carbonyl groups found!"); } @@ -742,10 +763,14 @@ Fragment carbonylFrag = carbonylOxygen.getFrag(); Fragment replacementFrag = replacementFragments.remove(0); List atomList = replacementFrag.getAtomList(); - Atom atomToReplaceCarbonylOxygen = atomList.get(atomList.size()-1); - Atom numericLocantAtomConnectedToCarbonyl = OpsinTools.depthFirstSearchForAtomWithNumericLocant(carbonylOxygen); - if (numericLocantAtomConnectedToCarbonyl!=null){ - atomList.get(0).addLocant(atomList.get(0).getElement() + numericLocantAtomConnectedToCarbonyl.getFirstLocant());//adds a locant like O1 giving another way of referencing this atom + if (atomList.size() == 2){ + //special case for oxime + //adds a locant like O1 giving another way of referencing this atom + Atom numericLocantAtomConnectedToCarbonyl = OpsinTools.depthFirstSearchForAtomWithNumericLocant(carbonylOxygen); + if (numericLocantAtomConnectedToCarbonyl != null) { + Atom lastatom = atomList.get(1); + lastatom.addLocant(lastatom.getElement().toString() + numericLocantAtomConnectedToCarbonyl.getFirstLocant()); + } } if (!words.get(1).getAttributeValue(TYPE_ATR).equals(WordType.functionalTerm.toString())){ resolveWordOrBracket(state, words.get(1 +i)); @@ -760,9 +785,14 @@ } } } + if (replacementFrag.getOutAtomCount() !=1) { + throw new RuntimeException("OPSIN Bug: Carbonyl replacement fragment expected to have one outatom"); + } + Atom atomToReplaceCarbonylOxygen = replacementFrag.getOutAtom(0).getAtom(); + replacementFrag.removeOutAtom(0); state.fragManager.replaceAtomWithAnotherAtomPreservingConnectivity(carbonylOxygen, atomToReplaceCarbonylOxygen); atomToReplaceCarbonylOxygen.setType(carbonylOxygen.getType());//copy the type e.g. if the carbonyl was a suffix this should appear as a suffix - if (state.xmlFragmentMap.getElement(replacementFrag)==null){//incorporate only for the case that replacementFrag came from a functional class element + if (replacementFrag.getTokenEl().getParent() == null) {//incorporate only for the case that replacementFrag came from a functional class element state.fragManager.incorporateFragment(replacementFrag, carbonylFrag); } } @@ -780,10 +810,10 @@ List matches = new ArrayList(); List rootFragAtomList = fragment.getAtomList(); for (Atom atom : rootFragAtomList) {//find all carbonyl oxygen - if (atom.getElement().equals("O") && atom.getCharge()==0){ + if (atom.getElement() == ChemEl.O && atom.getCharge()==0){ List neighbours =atom.getAtomNeighbours(); if (neighbours.size()==1){ - if (neighbours.get(0).getElement().equals("C")){ + if (neighbours.get(0).getElement() == ChemEl.C){ if (!locantForCarbonylAtom.isEmpty()){ Atom numericLocantAtomConnectedToCarbonyl = OpsinTools.depthFirstSearchForAtomWithNumericLocant(atom); if (numericLocantAtomConnectedToCarbonyl!=null){//could be the carbon of the carbonyl or the ring the carbonyl connects to in say a carbaldehyde @@ -812,18 +842,18 @@ return matches; } - private void buildAnhydride(BuildState state, List words) throws StructureBuildingException { + private void buildAnhydride(List words) throws StructureBuildingException { if (words.size()!=2 && words.size()!=3){ throw new StructureBuildingException("Unexpected number of words in anhydride. Check wordRules.xml, this is probably a bug"); } Element anhydrideWord = words.get(words.size()-1); - List functionalClass =XOMTools.getDescendantElementsWithTagName(anhydrideWord, FUNCTIONALGROUP_EL); + List functionalClass =OpsinTools.getDescendantElementsWithTagName(anhydrideWord, FUNCTIONALGROUP_EL); if (functionalClass.size()!=1){ throw new StructureBuildingException("Expected 1 group element found: " + functionalClass.size()); } String anhydrideSmiles = functionalClass.get(0).getAttributeValue(VALUE_ATR); int numberOfAnhydrideLinkages =1; - List multipliers =XOMTools.getDescendantElementsWithTagName(anhydrideWord, MULTIPLIER_EL); + List multipliers =OpsinTools.getDescendantElementsWithTagName(anhydrideWord, MULTIPLIER_EL); if (multipliers.size() >1){ throw new StructureBuildingException("Expected 0 or 1 multiplier found: " + multipliers.size()); } @@ -832,7 +862,7 @@ multipliers.get(0).detach(); } String anhydrideLocant = null; - List anhydrideLocants =XOMTools.getDescendantElementsWithTagNames(anhydrideWord, new String[]{LOCANT_EL, COLONORSEMICOLONDELIMITEDLOCANT_EL}); + List anhydrideLocants =OpsinTools.getDescendantElementsWithTagNames(anhydrideWord, new String[]{LOCANT_EL, COLONORSEMICOLONDELIMITEDLOCANT_EL}); if (anhydrideLocants.size() >1){ throw new StructureBuildingException("Expected 0 or 1 anhydrideLocants found: " + anhydrideLocants.size()); } @@ -841,7 +871,7 @@ anhydrideLocants.get(0).detach(); } resolveWordOrBracket(state, words.get(0)); - BuildResults br1 = new BuildResults(state, words.get(0)); + BuildResults br1 = new BuildResults(words.get(0)); if (br1.getFunctionalAtomCount() ==0){ throw new StructureBuildingException("Cannot find functionalAtom to form anhydride"); } @@ -850,7 +880,7 @@ throw new StructureBuildingException("Unsupported or invalid anhydride"); } resolveWordOrBracket(state, words.get(1)); - BuildResults br2 = new BuildResults(state, words.get(1)); + BuildResults br2 = new BuildResults(words.get(1)); if (br2.getFunctionalAtomCount() ==0){ throw new StructureBuildingException("Cannot find functionalAtom to form anhydride"); } @@ -862,13 +892,13 @@ BuildResults newAcidBr; if (i!=0){ Element newAcid = state.fragManager.cloneElement(state, words.get(0)); - XOMTools.insertAfter(words.get(0), newAcid); - newAcidBr = new BuildResults(state, newAcid); + OpsinTools.insertAfter(words.get(0), newAcid); + newAcidBr = new BuildResults(newAcid); } else{ newAcidBr =br1; } - formAnhydrideLink(state, anhydrideSmiles, newAcidBr, br2); + formAnhydrideLink(anhydrideSmiles, newAcidBr, br2); } } @@ -876,7 +906,7 @@ if (br1.getFunctionalAtomCount()!=1 && br2.getFunctionalAtomCount()!=1 ) { throw new StructureBuildingException("Invalid anhydride description"); } - formAnhydrideLink(state, anhydrideSmiles, br1, br2); + formAnhydrideLink(anhydrideSmiles, br1, br2); } } else{//symmetric anhydride @@ -885,7 +915,7 @@ if (numberOfAnhydrideLinkages!=1 || anhydrideLocant !=null ){ throw new StructureBuildingException("Unsupported or invalid anhydride"); } - formAnhydrideLink(state, anhydrideSmiles, br1, br1); + formAnhydrideLink(anhydrideSmiles, br1, br1); } else{//cyclic anhydride where group has more than 2 acids if (anhydrideLocant ==null){ @@ -904,7 +934,7 @@ } for (int i = 0; i < numberOfAnhydrideLinkages; i++) { - String[] locants = MATCH_COMMA.split(acidLocants[i]); + String[] locants = acidLocants[i].split(","); Atom oxygen1 =null; for (int j = functionalAtoms.size() -1; j >=0; j--) { Atom functionalAtom = functionalAtoms.get(j); @@ -928,7 +958,7 @@ if (oxygen1 ==null || oxygen2==null){ throw new StructureBuildingException("Unable to find locanted atom for anhydride formation"); } - formAnhydrideLink(state, anhydrideSmiles, oxygen1, oxygen2); + formAnhydrideLink(anhydrideSmiles, oxygen1, oxygen2); } } } @@ -937,39 +967,37 @@ throw new StructureBuildingException("Unsupported or invalid anhydride"); } Element newAcid = state.fragManager.cloneElement(state, words.get(0)); - XOMTools.insertAfter(words.get(0), newAcid); - BuildResults br2 = new BuildResults(state, newAcid); - formAnhydrideLink(state, anhydrideSmiles, br1, br2); + OpsinTools.insertAfter(words.get(0), newAcid); + BuildResults br2 = new BuildResults(newAcid); + formAnhydrideLink(anhydrideSmiles, br1, br2); } } } /** * Given buildResults for both the acids and the SMILES of the anhydride forms the anhydride bond using the first functionalAtom on each BuildResults - * @param state * @param anhydrideSmiles * @param acidBr1 * @param acidBr2 * @throws StructureBuildingException */ - private void formAnhydrideLink(BuildState state, String anhydrideSmiles, BuildResults acidBr1, BuildResults acidBr2)throws StructureBuildingException { + private void formAnhydrideLink(String anhydrideSmiles, BuildResults acidBr1, BuildResults acidBr2)throws StructureBuildingException { Atom oxygen1 = acidBr1.getFunctionalAtom(0); acidBr1.removeFunctionalAtom(0); Atom oxygen2 = acidBr2.getFunctionalAtom(0); acidBr2.removeFunctionalAtom(0); - formAnhydrideLink(state, anhydrideSmiles, oxygen1, oxygen2); + formAnhydrideLink(anhydrideSmiles, oxygen1, oxygen2); } /** * Given two atoms and the SMILES of the anhydride forms the anhydride bond - * @param state * @param anhydrideSmiles * @param oxygen1 * @param oxygen2 * @throws StructureBuildingException */ - private void formAnhydrideLink(BuildState state, String anhydrideSmiles, Atom oxygen1, Atom oxygen2)throws StructureBuildingException { - if (!oxygen1.getElement().equals("O")||!oxygen2.getElement().equals("O") || oxygen1.getBonds().size()!=1 ||oxygen2.getBonds().size()!=1) { + private void formAnhydrideLink(String anhydrideSmiles, Atom oxygen1, Atom oxygen2)throws StructureBuildingException { + if (oxygen1.getElement() != ChemEl.O || oxygen2.getElement() != ChemEl.O || oxygen1.getBondCount()!=1 ||oxygen2.getBondCount()!=1) { throw new StructureBuildingException("Problem building anhydride"); } Atom atomOnSecondAcidToConnectTo = oxygen2.getAtomNeighbours().get(0); @@ -982,12 +1010,12 @@ state.fragManager.incorporateFragment(anhydride, acidFragment1); } - private void buildAcidHalideOrPseudoHalide(BuildState state, List words) throws StructureBuildingException { + private void buildAcidHalideOrPseudoHalide(List words) throws StructureBuildingException { if (!words.get(0).getAttributeValue(TYPE_ATR).equals(WordType.full.toString())){ throw new StructureBuildingException("Don't alter wordRules.xml without checking the consequences!"); } resolveWordOrBracket(state, words.get(0)); - BuildResults acidBr = new BuildResults(state, words.get(0)); + BuildResults acidBr = new BuildResults(words.get(0)); int functionalAtomCount =acidBr.getFunctionalAtomCount(); if (functionalAtomCount==0){ throw new StructureBuildingException("No functionalAtoms detected!"); @@ -995,23 +1023,23 @@ boolean monoMultiplierDetected =false; List functionalGroupFragments = new ArrayList(); - for (int i=1; i functionalGroups = XOMTools.getDescendantElementsWithTagName(functionalGroupWord, FUNCTIONALGROUP_EL); + List functionalGroups = OpsinTools.getDescendantElementsWithTagName(functionalGroupWord, FUNCTIONALGROUP_EL); if (functionalGroups.size()!=1){ throw new StructureBuildingException("Expected exactly 1 functionalGroup. Found " + functionalGroups.size()); } Fragment monoValentFunctionGroup =state.fragManager.buildSMILES(functionalGroups.get(0).getAttributeValue(VALUE_ATR), FUNCTIONALCLASS_TYPE_VAL, NONE_LABELS_VAL); if (functionalGroups.get(0).getAttributeValue(TYPE_ATR).equals(MONOVALENTSTANDALONEGROUP_TYPE_VAL)){ - Atom ideAtom = monoValentFunctionGroup.getDefaultInAtom(); + Atom ideAtom = monoValentFunctionGroup.getDefaultInAtomOrFirstAtom(); ideAtom.addChargeAndProtons(1, 1);//e.g. make cyanide charge netural } - Element possibleMultiplier = (Element) XOMTools.getPreviousSibling(functionalGroups.get(0)); + Element possibleMultiplier = OpsinTools.getPreviousSibling(functionalGroups.get(0)); functionalGroupFragments.add(monoValentFunctionGroup); - if (possibleMultiplier!=null){ + if (possibleMultiplier != null){ int multiplierValue = Integer.parseInt(possibleMultiplier.getAttributeValue(VALUE_ATR)); - if (multiplierValue==1){ + if (multiplierValue == 1) { monoMultiplierDetected = true; } for (int j = 1; j < multiplierValue; j++) { @@ -1021,14 +1049,22 @@ } } int halideCount = functionalGroupFragments.size(); - if (halideCount > functionalAtomCount || (!monoMultiplierDetected && halideCount functionalAtomCount || (!monoMultiplierDetected && halideCount =0; i--) { Fragment ideFrag =functionalGroupFragments.get(i); - Atom ideAtom = ideFrag.getDefaultInAtom(); + Atom ideAtom = ideFrag.getDefaultInAtomOrFirstAtom(); Atom acidAtom = acidBr.getFunctionalAtom(i); - if (!acidAtom.getElement().equals("O")){ + if (acidAtom.getElement() != ChemEl.O){ throw new StructureBuildingException("Atom type expected to be oxygen but was: " +acidAtom.getElement()); } acidBr.removeFunctionalAtom(i); @@ -1038,50 +1074,52 @@ } } - private void buildAdditionCompound(BuildState state, List words) throws StructureBuildingException { - if (!words.get(0).getAttributeValue(TYPE_ATR).equals(WordType.full.toString())){ + private void buildAdditionCompound(List words) throws StructureBuildingException { + Element firstWord = words.get(0); + if (!firstWord.getAttributeValue(TYPE_ATR).equals(WordType.full.toString())) { throw new StructureBuildingException("Don't alter wordRules.xml without checking the consequences!"); } - resolveWordOrBracket(state, words.get(0)); - Element elementaryAtomEl = StructureBuildingMethods.findRightMostGroupInBracket(words.get(0)); - Fragment elementaryAtomFrag = state.xmlFragmentMap.get(elementaryAtomEl); + resolveWordOrBracket(state, firstWord); + Element elementaryAtomEl = StructureBuildingMethods.findRightMostGroupInBracket(firstWord); + Fragment elementaryAtomFrag = elementaryAtomEl.getFrag(); Atom elementaryAtom = elementaryAtomFrag.getFirstAtom(); int charge = elementaryAtom.getCharge(); List functionalGroupFragments = new ArrayList(); - for (int i=1; i functionalGroups = XOMTools.getDescendantElementsWithTagName(functionalGroupWord, FUNCTIONALGROUP_EL); - if (functionalGroups.size()!=1){ + for (int i = 1; i < words.size(); i++ ) { + Element functionalGroupWord = words.get(i); + List functionalGroups = OpsinTools.getDescendantElementsWithTagName(functionalGroupWord, FUNCTIONALGROUP_EL); + if (functionalGroups.size() != 1){ throw new StructureBuildingException("Expected exactly 1 functionalGroup. Found " + functionalGroups.size()); } + Element functionGroup = functionalGroups.get(0); - Fragment monoValentFunctionGroup =state.fragManager.buildSMILES(functionalGroups.get(0).getAttributeValue(VALUE_ATR), FUNCTIONALCLASS_TYPE_VAL, NONE_LABELS_VAL); - if (functionalGroups.get(0).getAttributeValue(TYPE_ATR).equals(MONOVALENTSTANDALONEGROUP_TYPE_VAL)){ - Atom ideAtom = monoValentFunctionGroup.getDefaultInAtom(); + Fragment monoValentFunctionGroup = state.fragManager.buildSMILES(functionGroup.getAttributeValue(VALUE_ATR), FUNCTIONALCLASS_TYPE_VAL, NONE_LABELS_VAL); + if (functionGroup.getAttributeValue(TYPE_ATR).equals(MONOVALENTSTANDALONEGROUP_TYPE_VAL)){ + Atom ideAtom = monoValentFunctionGroup.getDefaultInAtomOrFirstAtom(); ideAtom.addChargeAndProtons(1, 1);//e.g. make cyanide charge netural } - Element possibleMultiplier = (Element) XOMTools.getPreviousSibling(functionalGroups.get(0)); + Element possibleMultiplier = OpsinTools.getPreviousSibling(functionGroup); functionalGroupFragments.add(monoValentFunctionGroup); - if (possibleMultiplier!=null){ + if (possibleMultiplier != null) { int multiplierValue = Integer.parseInt(possibleMultiplier.getAttributeValue(VALUE_ATR)); for (int j = 1; j < multiplierValue; j++) { functionalGroupFragments.add(state.fragManager.copyFragment(monoValentFunctionGroup)); } possibleMultiplier.detach(); } - else if (words.size()==2){//silicon chloride -->silicon tetrachloride - int incomingBondOrder =elementaryAtom.getIncomingValency(); + else if (words.size() == 2) {//silicon chloride -->silicon tetrachloride + int incomingBondOrder = elementaryAtom.getIncomingValency(); int expectedValency; - if (charge > 0){ + if (charge > 0) { expectedValency = incomingBondOrder + charge; } else{ - if (elementaryAtom.getProperty(Atom.OXIDATION_NUMBER)!=null){ + if (elementaryAtom.getProperty(Atom.OXIDATION_NUMBER) != null) { expectedValency = elementaryAtom.getProperty(Atom.OXIDATION_NUMBER); } else{ - if (elementaryAtomEl.getAttribute(COMMONOXIDATIONSTATESANDMAX_ATR)!=null){ - String[] typicalOxidationStates = MATCH_COMMA.split(MATCH_COLON.split(elementaryAtomEl.getAttributeValue(COMMONOXIDATIONSTATESANDMAX_ATR))[0]); + if (elementaryAtomEl.getAttribute(COMMONOXIDATIONSTATESANDMAX_ATR) != null) { + String[] typicalOxidationStates = elementaryAtomEl.getAttributeValue(COMMONOXIDATIONSTATESANDMAX_ATR).split(":")[0].split(","); expectedValency = Integer.parseInt(typicalOxidationStates[0]); } else{ @@ -1089,33 +1127,76 @@ } } } - int implicitMultiplier = expectedValency -incomingBondOrder >1 ? expectedValency -incomingBondOrder : 1; + int implicitMultiplier = expectedValency - incomingBondOrder > 1 ? expectedValency - incomingBondOrder : 1; for (int j = 1; j < implicitMultiplier; j++) { functionalGroupFragments.add(state.fragManager.copyFragment(monoValentFunctionGroup)); } } } - int halideCount = functionalGroupFragments.size(); - if (charge>0){ - elementaryAtom.setCharge(charge - halideCount); + if (charge > 0) { + elementaryAtom.setCharge(charge - functionalGroupFragments.size()); } + + //[AlH3] --> [AlH4-] , [AlH4] --> [AlH4-] + applyAluminiumHydrideSpecialCase(firstWord, elementaryAtom, functionalGroupFragments); + + int halideCount = functionalGroupFragments.size(); Integer maximumVal = ValencyChecker.getMaximumValency(elementaryAtom.getElement(), elementaryAtom.getCharge()); - if (maximumVal!=null && halideCount > maximumVal){ + if (maximumVal != null && halideCount > maximumVal) { throw new StructureBuildingException("Too many halides/psuedo halides addded to " +elementaryAtom.getElement()); } - for (int i = halideCount - 1; i>=0; i--) { - Fragment ideFrag =functionalGroupFragments.get(i); - Atom ideAtom = ideFrag.getDefaultInAtom(); + for (int i = halideCount - 1; i >= 0; i--) { + Fragment ideFrag = functionalGroupFragments.get(i); + Atom ideAtom = ideFrag.getDefaultInAtomOrFirstAtom(); state.fragManager.incorporateFragment(ideFrag, ideAtom, elementaryAtomFrag, elementaryAtom, 1); } } + + private void applyAluminiumHydrideSpecialCase(Element firstWord, Atom elementaryAtom, + List functionalGroupFragments) throws StructureBuildingException { + if ((elementaryAtom.getElement() == ChemEl.Al || elementaryAtom.getElement() == ChemEl.B) + && elementaryAtom.getCharge() == 0) { + if (functionalGroupFragments.size() == 3) { + if (functionalGroupFragments.get(0).getDefaultInAtomOrFirstAtom().getElement() == ChemEl.H + && functionalGroupFragments.get(1).getDefaultInAtomOrFirstAtom().getElement() == ChemEl.H + && functionalGroupFragments.get(2).getDefaultInAtomOrFirstAtom().getElement() == ChemEl.H) { + Element counterCationWordRule = OpsinTools.getPreviousSibling(firstWord.getParent()); + if (counterCationWordRule != null && counterCationWordRule.getChildCount() == 1) { + Element word =counterCationWordRule.getFirstChildElement(WORD_EL); + if (word != null && word.getChildCount() ==1) { + Element root = word.getFirstChildElement(ROOT_EL); + if (root != null && root.getChildCount() ==1) { + Element group = root.getFirstChildElement(GROUP_EL); + if (group != null && ELEMENTARYATOM_SUBTYPE_VAL.equals(group.getAttributeValue(SUBTYPE_ATR))) { + ChemEl chemEl = group.getFrag().getFirstAtom().getElement(); + if (chemEl == ChemEl.Li || chemEl == ChemEl.Na || chemEl == ChemEl.K || chemEl == ChemEl.Rb || chemEl == ChemEl.Cs) { + functionalGroupFragments.add(state.fragManager.copyFragment(functionalGroupFragments.get(0))); + elementaryAtom.setCharge(-1); + } + } + } + } + } + + } + } + else if (functionalGroupFragments.size() == 4) { + if (functionalGroupFragments.get(0).getDefaultInAtomOrFirstAtom().getElement() == ChemEl.H + && functionalGroupFragments.get(1).getDefaultInAtomOrFirstAtom().getElement() == ChemEl.H + && functionalGroupFragments.get(2).getDefaultInAtomOrFirstAtom().getElement() == ChemEl.H + && functionalGroupFragments.get(3).getDefaultInAtomOrFirstAtom().getElement() == ChemEl.H) { + elementaryAtom.setCharge(-1); + } + } + } + } - private void buildGlycol(BuildState state, List words) throws StructureBuildingException { + private void buildGlycol(List words) throws StructureBuildingException { int wordIndice = 0; resolveWordOrBracket(state, words.get(wordIndice));//the group Element finalGroup = findRightMostGroupInWordOrWordRule(words.get(wordIndice)); - Fragment theDiRadical = state.xmlFragmentMap.get(finalGroup); + Fragment theDiRadical = finalGroup.getFrag(); if (theDiRadical.getOutAtomCount()!=2){ throw new StructureBuildingException("Glycol class names (e.g. ethylene glycol) expect two outAtoms. Found: " + theDiRadical.getOutAtomCount() ); } @@ -1123,25 +1204,27 @@ if (wordIndice >= words.size() || !words.get(wordIndice).getAttributeValue(TYPE_ATR).equals(WordType.functionalTerm.toString())){ throw new StructureBuildingException("Glycol functionalTerm word expected"); } - List functionalClassEls = XOMTools.getDescendantElementsWithTagName(words.get(wordIndice), FUNCTIONALCLASS_EL); + List functionalClassEls = OpsinTools.getDescendantElementsWithTagName(words.get(wordIndice), FUNCTIONALCLASS_EL); if (functionalClassEls.size()!=1){ throw new StructureBuildingException("Glycol functional class not found where expected"); } - Atom outAtom1 = theDiRadical.getAtomOrNextSuitableAtomOrThrow(theDiRadical.getOutAtom(0).getAtom(), theDiRadical.getOutAtom(0).getValency(), false); + OutAtom outAtom1 = theDiRadical.getOutAtom(0); + Atom chosenAtom1 = outAtom1.isSetExplicitly() ? outAtom1.getAtom() : findAtomForUnlocantedRadical(state, theDiRadical, outAtom1); Fragment functionalFrag =state.fragManager.buildSMILES(functionalClassEls.get(0).getAttributeValue(VALUE_ATR), FUNCTIONALCLASS_TYPE_VAL, NONE_LABELS_VAL); - if (theDiRadical.getOutAtom(0).getValency() !=1){ - throw new StructureBuildingException("OutAtom has unexpected valency. Expected 1. Actual: " + theDiRadical.getOutAtom(0).getValency()); + if (outAtom1.getValency() != 1){ + throw new StructureBuildingException("OutAtom has unexpected valency. Expected 1. Actual: " + outAtom1.getValency()); } - state.fragManager.createBond(outAtom1, functionalFrag.getFirstAtom(), 1); + state.fragManager.createBond(chosenAtom1, functionalFrag.getFirstAtom(), 1); state.fragManager.incorporateFragment(functionalFrag, theDiRadical); - Atom outAtom2 = theDiRadical.getAtomOrNextSuitableAtomOrThrow(theDiRadical.getOutAtom(1).getAtom(), theDiRadical.getOutAtom(1).getValency(), false); + OutAtom outAtom2 = theDiRadical.getOutAtom(1); + Atom chosenAtom2 = outAtom2.isSetExplicitly() ? outAtom2.getAtom() : findAtomForUnlocantedRadical(state, theDiRadical, outAtom2); Fragment hydroxy =state.fragManager.buildSMILES("O", FUNCTIONALCLASS_TYPE_VAL, NONE_LABELS_VAL); - if (theDiRadical.getOutAtom(1).getValency() !=1){ - throw new StructureBuildingException("OutAtom has unexpected valency. Expected 1. Actual: " + theDiRadical.getOutAtom(1).getValency()); + if (outAtom2.getValency() != 1){ + throw new StructureBuildingException("OutAtom has unexpected valency. Expected 1. Actual: " + outAtom2.getValency()); } - state.fragManager.createBond(outAtom2, hydroxy.getFirstAtom(), 1); + state.fragManager.createBond(chosenAtom2, hydroxy.getFirstAtom(), 1); state.fragManager.incorporateFragment(hydroxy, theDiRadical); theDiRadical.removeOutAtom(1); theDiRadical.removeOutAtom(0); @@ -1153,13 +1236,13 @@ * triethylene glycol n-butyl ether * tripropylene glycol methyl ether * dipropylene glycol methyl ether acetate - * @param state * @param words * @throws StructureBuildingException */ - private void buildGlycolEther(BuildState state, List words) throws StructureBuildingException { - List wordsToAttachToGlcyol = new ArrayList(); + private void buildGlycolEther(List words) throws StructureBuildingException { + List wordsToAttachToGlycol = new ArrayList(); Element glycol =words.get(0); + resolveWordOrBracket(state, glycol);//if this actually is something like ethylene glycol this is a no-op as it will already have been resolved if (!glycol.getAttributeValue(TYPE_ATR).equals(WordType.full.toString())){ throw new StructureBuildingException("OPSIN Bug: Cannot find glycol word!"); } @@ -1167,47 +1250,39 @@ Element wordOrWordRule =words.get(i); //ether ignored if (!wordOrWordRule.getAttributeValue(TYPE_ATR).equals(WordType.functionalTerm.toString())){ - wordsToAttachToGlcyol.add(wordOrWordRule); + resolveWordOrBracket(state, wordOrWordRule);//the substituent to attach + wordsToAttachToGlycol.add(wordOrWordRule); } else if (!wordOrWordRule.getAttributeValue(VALUE_ATR).equalsIgnoreCase("ether")){ throw new StructureBuildingException("Unexpected word encountered when applying glycol ether word rule " + wordOrWordRule.getAttributeValue(VALUE_ATR)); } } - if (wordsToAttachToGlcyol.size() !=1 && wordsToAttachToGlcyol.size() !=2 ){ - throw new StructureBuildingException("Unexpected number of substituents for glycol ether. Expected 1 or 2 found: " +wordsToAttachToGlcyol.size()); + int numOfEthers = wordsToAttachToGlycol.size(); + if (numOfEthers == 0) { + throw new StructureBuildingException("OPSIN Bug: Unexpected number of substituents for glycol ether"); } Element finalGroup = findRightMostGroupInWordOrWordRule(glycol); - Fragment theDiRadical = state.xmlFragmentMap.get(finalGroup); - List atomList = theDiRadical.getAtomList(); - List glycolAtoms = new ArrayList(); - for (Atom atom : atomList) { - if (atom.getElement().equals("O")&& atom.getType().equals(FUNCTIONALCLASS_TYPE_VAL)){ - glycolAtoms.add(atom); - } - } - if (glycolAtoms.size()!=2){ - throw new StructureBuildingException("OPSIN bug: unable to find the two glycol oxygens"); - } - BuildResults br1 = new BuildResults(state, wordsToAttachToGlcyol.get(0)); - if (br1.getOutAtomCount() ==0){ - throw new StructureBuildingException("Substituent had no outAtom to form glycol ether"); - } - state.fragManager.createBond(glycolAtoms.get(0), br1.getOutAtom(0).getAtom(), 1); - br1.removeOutAtom(0); - if (wordsToAttachToGlcyol.size()==2){ - BuildResults br2 = new BuildResults(state, wordsToAttachToGlcyol.get(1)); - if (br2.getOutAtomCount() >0){//form ether - state.fragManager.createBond(glycolAtoms.get(1), br2.getOutAtom(0).getAtom(), 1); - br2.removeOutAtom(0); + List hydroxyAtoms = FragmentTools.findHydroxyGroups(finalGroup.getFrag()); + if (hydroxyAtoms.size() == 0) { + throw new StructureBuildingException("No hydroxy groups found in: " + finalGroup.getValue() + " to form ether"); + } + if (hydroxyAtoms.size() < numOfEthers) { + throw new StructureBuildingException("Insufficient hydroxy groups found in: " + finalGroup.getValue() + " to form required number of ethers"); + } + for (int i = 0; i < numOfEthers; i++) { + BuildResults br = new BuildResults(wordsToAttachToGlycol.get(i)); + if (br.getOutAtomCount() >0){//form ether + state.fragManager.createBond(hydroxyAtoms.get(i), br.getOutAtom(0).getAtom(), 1); + br.removeOutAtom(0); } - else if (br2.getFunctionalAtomCount() >0){//form ester - Atom ateAtom = br2.getFunctionalAtom(0); + else if (br.getFunctionalAtomCount() >0){//form ester + Atom ateAtom = br.getFunctionalAtom(0); ateAtom.neutraliseCharge(); - state.fragManager.replaceAtomWithAnotherAtomPreservingConnectivity(glycolAtoms.get(1), br2.getFunctionalAtom(0)); - br2.removeFunctionalAtom(0); + state.fragManager.replaceAtomWithAnotherAtomPreservingConnectivity(hydroxyAtoms.get(i), br.getFunctionalAtom(0)); + br.removeFunctionalAtom(0); } else{ - throw new StructureBuildingException("Word had neither an outAtom or a functionalAtom! hence neither and ether or ester could be formed : " + wordsToAttachToGlcyol.get(1).getAttributeValue(VALUE_ATR)); + throw new StructureBuildingException("Word had neither an outAtom or a functionalAtom! hence neither and ether or ester could be formed : " + wordsToAttachToGlycol.get(i).getAttributeValue(VALUE_ATR)); } } } @@ -1215,18 +1290,17 @@ /** * Builds acetals/ketals/hemiacetals/hemiketals and chalcogen analogues * The distinction between acetals and ketals is not enforced (ketals are a subset of acetals) - * @param state * @param words * @throws StructureBuildingException */ - private void buildAcetal(BuildState state, List words) throws StructureBuildingException { + private void buildAcetal(List words) throws StructureBuildingException { for (int i = 0; i < words.size()-1; i++) { resolveWordOrBracket(state, words.get(i)); } BuildResults substituentsBr = new BuildResults(); for (int i = 1; i < words.size()-1; i++) { Element currentWord = words.get(i); - BuildResults substituentBr = new BuildResults(state, currentWord); + BuildResults substituentBr = new BuildResults(currentWord); int outAtomCount = substituentBr.getOutAtomCount(); if (outAtomCount ==1){ String locantForSubstituent = currentWord.getAttributeValue(LOCANT_ATR); @@ -1240,37 +1314,26 @@ substituentsBr.mergeBuildResults(substituentBr); } Element rightMostGroup = findRightMostGroupInWordOrWordRule(words.get(0)); - Fragment rootFragment = state.xmlFragmentMap.get(rightMostGroup);//the group which will be modified + Fragment rootFragment = rightMostGroup.getFrag();//the group which will be modified List carbonylOxygen= findCarbonylOxygens(rootFragment, new ArrayList()); Element functionalWord = words.get(words.size()-1); - List functionalClasses = XOMTools.getDescendantElementsWithTagName(functionalWord, FUNCTIONALCLASS_EL); + List functionalClasses = OpsinTools.getDescendantElementsWithTagName(functionalWord, FUNCTIONALCLASS_EL); if (functionalClasses.size()!=1){ throw new StructureBuildingException("OPSIN bug: unable to find acetal functionalClass"); } Element functionalClassEl = functionalClasses.get(0); String functionalClass = functionalClassEl.getValue(); - Element beforeAcetal = (Element) XOMTools.getPreviousSibling(functionalClassEl); + Element beforeAcetal = OpsinTools.getPreviousSibling(functionalClassEl); int numberOfAcetals =1; - List elements = null; - if (beforeAcetal!=null){ - if (beforeAcetal.getLocalName().equals(MULTIPLIER_EL)){ + String[] elements = functionalClassEl.getAttributeValue(VALUE_ATR).split(","); + if (beforeAcetal != null){ + if (beforeAcetal.getName().equals(MULTIPLIER_EL)){ numberOfAcetals = Integer.parseInt(beforeAcetal.getAttributeValue(VALUE_ATR)); } else{ - elements = determineChalcogenReplacementOfAcetal(functionalClassEl); - if (elements.size()>2){ - throw new StructureBuildingException(functionalClass + " only has two oxygen"); - } - if (elements.size()==1){ - elements.add("O"); - } + replaceChalcogensInAcetal(functionalClassEl, elements); } } - if (elements==null){ - elements = new ArrayList(); - elements.add("O"); - elements.add("O"); - } if (carbonylOxygen.size() < numberOfAcetals){ throw new StructureBuildingException("Insufficient carbonyls to form " + numberOfAcetals +" " + functionalClass ); @@ -1278,41 +1341,50 @@ boolean hemiacetal = functionalClass.contains("hemi"); List acetalFrags = new ArrayList(); for (int i = 0; i < numberOfAcetals; i++) { - acetalFrags.add(formAcetal(state, carbonylOxygen, elements)); + acetalFrags.add(formAcetal(carbonylOxygen, elements)); } int bondsToForm = hemiacetal ? numberOfAcetals : 2*numberOfAcetals; if (substituentsBr.getOutAtomCount()!=bondsToForm){ throw new StructureBuildingException("incorrect number of susbtituents when forming " + functionalClass); } - connectSubstituentsToAcetal(state, acetalFrags, substituentsBr, hemiacetal); + connectSubstituentsToAcetal(acetalFrags, substituentsBr, hemiacetal); } - private List determineChalcogenReplacementOfAcetal(Element functionalClassEl) throws StructureBuildingException { - Element currentEl = (Element) functionalClassEl.getParent().getChild(0); - int multiplier =1; - List elements = new ArrayList(); - while(currentEl !=functionalClassEl){ - if (currentEl.getLocalName().equals(MULTIPLIER_EL)){ - multiplier = Integer.parseInt(currentEl.getAttributeValue(VALUE_ATR)); - } - else if (currentEl.getLocalName().equals(GROUP_EL)){ - for (int i = 0; i < multiplier; i++) { - elements.add(currentEl.getAttributeValue(VALUE_ATR)); + private void replaceChalcogensInAcetal(Element functionalClassEl, String[] elements) throws StructureBuildingException { + Element currentEl = functionalClassEl.getParent().getChild(0); + int multiplier = 1; + if (currentEl.getName().equals(MULTIPLIER_EL)){ + multiplier = Integer.parseInt(currentEl.getAttributeValue(VALUE_ATR)); + if (multiplier > 2){ + throw new StructureBuildingException(functionalClassEl.getValue() + " only has two oxygen!"); + } + currentEl = OpsinTools.getNextSibling(currentEl); + } + int i = 0; + while(currentEl != functionalClassEl) { + if (currentEl.getName().equals(GROUP_EL)) { + for (int j = 0; j < multiplier; j++) { + if (i == 2) { + throw new StructureBuildingException(functionalClassEl.getValue() + " only has two oxygen!"); + } + if (!elements[i].equals("O")){ + throw new StructureBuildingException("Replacement on " + functionalClassEl.getValue() + " can only be used to replace oxygen!"); + } + elements[i++] = currentEl.getAttributeValue(VALUE_ATR); } } - else{ + else { throw new StructureBuildingException("Unexpected element before acetal"); } - currentEl =(Element) XOMTools.getNextSibling(currentEl); + currentEl = OpsinTools.getNextSibling(currentEl); } - return elements; } - private Fragment formAcetal(BuildState state, List carbonylOxygen, List elements) throws StructureBuildingException { + private Fragment formAcetal(List carbonylOxygen, String[] elements) throws StructureBuildingException { Atom neighbouringCarbon = carbonylOxygen.get(0).getAtomNeighbours().get(0); state.fragManager.removeAtomAndAssociatedBonds(carbonylOxygen.get(0)); carbonylOxygen.remove(0); - Fragment acetalFrag = state.fragManager.buildSMILES(StringTools.stringListToString(elements, "."),"",NONE_LABELS_VAL); + Fragment acetalFrag = state.fragManager.buildSMILES(StringTools.arrayToString(elements, "."),"",NONE_LABELS_VAL); FragmentTools.assignElementLocants(acetalFrag, new ArrayList()); List acetalAtomList = acetalFrag.getAtomList(); Atom atom1 = acetalAtomList.get(0); @@ -1323,69 +1395,185 @@ return acetalFrag; } - private void buildBiochemicalEster(BuildState state, List words, int numberOfWordRules) throws StructureBuildingException { + private boolean buildAlcoholEster(List words, int numberOfWordRules) throws StructureBuildingException { for (Element word : words) { if (!WordType.full.toString().equals(word.getAttributeValue(TYPE_ATR))){ - throw new StructureBuildingException("Bug in word rule for biochemicalEster"); + throw new StructureBuildingException("Bug in word rule for potentialAlcoholEster"); } resolveWordOrBracket(state, word); } int ateWords = words.size() -1; if (ateWords < 1){ - throw new StructureBuildingException("Bug in word rule for biochemicalEster"); + throw new StructureBuildingException("Bug in word rule for potentialAlcoholEster"); } - Fragment biochemicalFragment = state.xmlFragmentMap.get(findRightMostGroupInWordOrWordRule(words.get(0))); - List hydroxyAtoms = FragmentTools.findHydroxyGroups(biochemicalFragment); - boolean ambiguous = ateWords != hydroxyAtoms.size(); + Fragment potentialAlcoholFragment = findRightMostGroupInWordOrWordRule(words.get(0)).getFrag(); + List hydroxyAtoms = FragmentTools.findHydroxyGroups(potentialAlcoholFragment); + List chosenHydroxyAtoms = new ArrayList(); + List ateBuildResults = new ArrayList(); for (int i = 1; i < words.size(); i++) { Element ateWord = words.get(i); - String locant = ateWord.getAttributeValue(LOCANT_ATR); - - Atom atomOnBiochemicalFragment; - if (locant!=null){ - atomOnBiochemicalFragment = biochemicalFragment.getAtomByLocantOrThrow(locant); - if (atomOnBiochemicalFragment.getBonds().size()!=1){ - atomOnBiochemicalFragment = biochemicalFragment.getAtomByLocantOrThrow("O" + locant); + BuildResults wordBr = new BuildResults(ateWord); + if (isAppropriateAteGroupForAlcoholEster(ateWord, wordBr)) { + String locant = ateWord.getAttributeValue(LOCANT_ATR); + if (locant != null) { + Atom atomOnAlcoholFragment = potentialAlcoholFragment.getAtomByLocantOrThrow(locant); + if (!hydroxyAtoms.contains(atomOnAlcoholFragment) || chosenHydroxyAtoms.contains(atomOnAlcoholFragment)) { + atomOnAlcoholFragment = potentialAlcoholFragment.getAtomByLocantOrThrow("O" + locant); + } + if (!hydroxyAtoms.contains(atomOnAlcoholFragment) || chosenHydroxyAtoms.contains(atomOnAlcoholFragment)) { + throw new StructureBuildingException(locant + " did not point to a hydroxy group to be used for ester formation"); + } + chosenHydroxyAtoms.add(atomOnAlcoholFragment); + } + else if (words.size() == 2) { + //special case for adenosine triphosphate and the like + //guess that locant might be 5' + Atom atomOnAlcoholFragment = potentialAlcoholFragment.getAtomByLocant("O5'"); + if (hydroxyAtoms.contains(atomOnAlcoholFragment)) { + chosenHydroxyAtoms.add(atomOnAlcoholFragment); + } } + ateBuildResults.add(wordBr); } - else{ - atomOnBiochemicalFragment = biochemicalFragment.getAtomByLocant("O5'");//take a guess at it being 5' ;-) - if (atomOnBiochemicalFragment==null && !ambiguous && hydroxyAtoms.size() > 0){ - atomOnBiochemicalFragment = hydroxyAtoms.get(0); - } - } - BuildResults br = new BuildResults(state, ateWord); - if (atomOnBiochemicalFragment != null){ - hydroxyAtoms.remove(atomOnBiochemicalFragment); - String element = atomOnBiochemicalFragment.getElement(); - if (atomOnBiochemicalFragment.getBonds().size()!=1 || (!element.equals("O") && !element.equals("S") && !element.equals("Se") && !element.equals("Te"))){ - throw new StructureBuildingException("Failed to find hydroxy group on biochemical fragment"); - } - if (br.getFunctionalAtomCount()==0){ - throw new StructureBuildingException("Unable to find functional atom to form biochemical ester"); + else { + return false; + } + } + + if (chosenHydroxyAtoms.size() < ateWords) { + if (!chosenHydroxyAtoms.isEmpty()) { + throw new RuntimeException("OPSIN Bug: Either all or none of the esters should be locanted in alcohol ester rule"); + } + if (hydroxyAtoms.size() == ateWords || hydroxyAtoms.size() > ateWords && (AmbiguityChecker.allAtomsEquivalent(hydroxyAtoms) || potentialAlcoholFragment.getTokenEl().getValue().equals("glycerol") )) { + for (int i = 0; i < ateWords; i++) { + chosenHydroxyAtoms.add(hydroxyAtoms.get(i)); } - Atom functionalAtom =br.getFunctionalAtom(0); - br.removeFunctionalAtom(0); - functionalAtom.neutraliseCharge(); - - state.fragManager.replaceAtomWithAnotherAtomPreservingConnectivity(functionalAtom, atomOnBiochemicalFragment); } - + else { + return false; + } + } + + for (int i = 0; i < ateWords; i++) { + BuildResults br = ateBuildResults.get(i); + Element ateWord = words.get(i + 1); Element ateGroup = findRightMostGroupInWordOrWordRule(ateWord); - if (ateGroup.getAttribute(NUMBEROFFUNCTIONALATOMSTOREMOVE_ATR)==null && numberOfWordRules==1){ - //by convention [O-] are implicitly converted to [OH] to balance charge - for (int j = br.getFunctionalAtomCount() -1; j>=0; j--) { - Atom atomToDefunctionalise =br.getFunctionalAtom(j); + if (ateGroup.getAttribute(NUMBEROFFUNCTIONALATOMSTOREMOVE_ATR) == null && numberOfWordRules == 1) { + //by convention [O-] are implicitly converted to [OH] when phosphates/sulfates are attached + //If word rules is > 1 this will be done or not done as part of charge balancing + for (int j = br.getFunctionalAtomCount() -1; j >= 1; j--) { + Atom atomToDefunctionalise = br.getFunctionalAtom(j); br.removeFunctionalAtom(j); atomToDefunctionalise.neutraliseCharge(); } } + Atom functionalAtom = br.getFunctionalAtom(0); + br.removeFunctionalAtom(0); + functionalAtom.neutraliseCharge(); + state.fragManager.replaceAtomWithAnotherAtomPreservingConnectivity(functionalAtom, chosenHydroxyAtoms.get(i)); + } + return true; + } + + private void buildAmineDiConjunctiveSuffix(List words) throws StructureBuildingException { + for (Element word : words) { + if (!WordType.full.toString().equals(word.getAttributeValue(TYPE_ATR))){ + throw new StructureBuildingException("Bug in word rule for amineDiConjunctiveSuffix"); + } + resolveWordOrBracket(state, word); + } + if (words.size() != 3) { + throw new StructureBuildingException("Unexpected number of words encountered when processing name of type amineDiConjunctiveSuffix, expected 3 but found: " + words.size()); + } + Element aminoAcid = findRightMostGroupInWordOrWordRule(words.get(0)); + if (aminoAcid == null) { + throw new RuntimeException("OPSIN Bug: failed to find amino acid"); + } + Atom amineAtom = aminoAcid.getFrag().getDefaultInAtom(); + if (amineAtom == null) { + throw new StructureBuildingException("OPSIN did not know where the amino acid amine was located"); + } + + for (int i = 1; i < words.size(); i++) { + Element word = words.get(i); + Fragment suffixLikeGroup = findRightMostGroupInWordOrWordRule(word).getFrag(); + String locant = word.getAttributeValue(LOCANT_ATR); + if (locant != null){ + if (!locant.equals("N")) { + throw new RuntimeException("OPSIN Bug: locant expected to be N but was: " + locant); + } + } + Atom atomToConnectToOnConjunctiveFrag = FragmentTools.lastNonSuffixCarbonWithSufficientValency(suffixLikeGroup); + if (atomToConnectToOnConjunctiveFrag == null) { + throw new StructureBuildingException("OPSIN Bug: Unable to find non suffix carbon with sufficient valency"); + } + state.fragManager.createBond(atomToConnectToOnConjunctiveFrag, amineAtom, 1); } } - private void connectSubstituentsToAcetal(BuildState state, List acetalFrags, BuildResults subBr, boolean hemiacetal) throws StructureBuildingException { + private static final Pattern matchCommonCarboxylicSalt = Pattern.compile("tri-?fluoro-?acetate?$", Pattern.CASE_INSENSITIVE); + private static final Pattern matchCommonEsterFormingInorganicSalt = Pattern.compile("(ortho-?)?(bor|phosphor|phosphate?|phosphite?)|carbam|carbon|sulfur|sulfate?|sulfite?|diphosphate?|triphosphate?", Pattern.CASE_INSENSITIVE); + + /** + * CAS endorses the use of ...ol ...ate names means esters + * but only for cases involving "common acids": + * Acetic acid; Benzenesulfonic acid; Benzenesulfonic acid, 4-methyl-; Benzoic acid and its monoamino, mononitro, and dinitro derivatives; + * Boric acid (H3BO3); Carbamic acid; Carbamic acid, N-methyl-; Carbamic acid, N-phenyl-; Carbonic acid; Formic acid; Methanesulfonic acid; + * Nitric acid; Phosphoric acid; Phosphorodithioic acid; Phosphorothioic acid; Phosphorous acid; Propanoic acid; Sulfuric acid; and Sulfurous acid. + * ...unless the alcohol component is also common. + * + * As in practice a lot of use won't be from CAS names we use the following heuristic: + * Is locanted OR + * Has 1 functional atom (And not common salt e.g. Trifluoroacetate) OR + * common phosphorus/sulfur ate including di/tri phosphate + * @param ateWord + * @param wordBr + * @return + * @throws StructureBuildingException + */ + private boolean isAppropriateAteGroupForAlcoholEster(Element ateWord, BuildResults wordBr) throws StructureBuildingException { + if (wordBr.getFunctionalAtomCount() > 0) { + if (ateWord.getAttributeValue(LOCANT_ATR) != null) { + //locanted, so locant must be used for this purpose + return true; + } + if (wordBr.getFunctionalAtomCount() == 1) { + if (matchCommonCarboxylicSalt.matcher(ateWord.getAttributeValue(VALUE_ATR)).find()) { + return false; + } + return true; + } + String ateGroupText = findRightMostGroupInWordOrWordRule(ateWord).getValue(); + //e.g. triphosphate + if (matchCommonEsterFormingInorganicSalt.matcher(ateGroupText).matches()) { + return true; + } + + } + return false; + } + + private void splitAlcoholEsterRuleIntoTwoSimpleWordRules(List words) { + Element firstGroup = words.get(0); + Element wordRule = firstGroup.getParent(); + wordRule.getAttribute(WORDRULE_ATR).setValue(WordRule.simple.toString()); + wordRule.getAttribute(VALUE_ATR).setValue(firstGroup.getAttributeValue(VALUE_ATR)); + + Element newWordRule = new GroupingEl(WORDRULE_EL); + newWordRule.addAttribute(TYPE_ATR, WordType.full.toString()); + newWordRule.addAttribute(WORDRULE_ATR, WordRule.simple.toString()); + newWordRule.addAttribute(VALUE_ATR, words.get(1).getAttributeValue(VALUE_ATR)); + OpsinTools.insertAfter(wordRule, newWordRule); + for (int i = 1; i < words.size(); i++) { + Element word = words.get(i); + word.detach(); + newWordRule.addChild(word); + } + } + + private void connectSubstituentsToAcetal(List acetalFrags, BuildResults subBr, boolean hemiacetal) throws StructureBuildingException { Map usageMap= new HashMap(); for (int i = subBr.getOutAtomCount() -1; i>=0; i--) { OutAtom out = subBr.getOutAtom(i); @@ -1398,11 +1586,11 @@ Atom a =OpsinTools.depthFirstSearchForNonSuffixAtomWithLocant(possibleAcetalFrag.getFirstAtom(), out.getLocant()); if (a!=null){ List atomList = possibleAcetalFrag.getAtomList(); - if (atomList.get(0).getBonds().size()==1){ + if (atomList.get(0).getBondCount()==1){ atomToUse = atomList.get(0); break; } - else if (atomList.get(1).getBonds().size()==1){ + else if (atomList.get(1).getBondCount()==1){ atomToUse = atomList.get(1); break; } @@ -1419,10 +1607,10 @@ } else{ List atomList = acetalFrags.get(0).getAtomList(); - if (atomList.get(0).getBonds().size()==1){ + if (atomList.get(0).getBondCount()==1){ atomToUse = atomList.get(0); } - else if (atomList.get(1).getBonds().size()==1){ + else if (atomList.get(1).getBondCount()==1){ atomToUse = atomList.get(1); } else{ @@ -1440,20 +1628,20 @@ } } - private void buildCyclicPeptide(BuildState state, List words) throws StructureBuildingException { + private void buildCyclicPeptide(List words) throws StructureBuildingException { if (words.size() != 2){ throw new StructureBuildingException("OPSIN Bug: Expected 2 words in cyclic peptide name, found: " + words.size()); } Element peptide = words.get(1); resolveWordOrBracket(state, peptide); - BuildResults peptideBr = new BuildResults(state, peptide); + BuildResults peptideBr = new BuildResults(peptide); if (peptideBr.getOutAtomCount() ==1){ - Atom outAtom =peptideBr.getOutAtomTakingIntoAccountWhetherSetExplicitly(0); - List aminoAcids = XOMTools.getDescendantElementsWithTagNameAndAttribute(peptide, GROUP_EL, TYPE_ATR, AMINOACID_TYPE_VAL); + Atom outAtom = getOutAtomTakingIntoAccountWhetherSetExplicitly(peptideBr, 0); + List aminoAcids = OpsinTools.getDescendantElementsWithTagNameAndAttribute(peptide, GROUP_EL, TYPE_ATR, AMINOACID_TYPE_VAL); if (aminoAcids.size() < 2){ throw new StructureBuildingException("Cyclic peptide building failed: Requires at least two amino acids!"); } - Atom inAtom = state.xmlFragmentMap.get(aminoAcids.get(0)).getDefaultInAtom(); + Atom inAtom = aminoAcids.get(0).getFrag().getDefaultInAtomOrFirstAtom(); state.fragManager.createBond(outAtom, inAtom, peptideBr.getOutAtom(0).getValency()); peptideBr.removeAllOutAtoms(); @@ -1463,33 +1651,33 @@ } } - private List buildPolymer(BuildState state, List words) throws StructureBuildingException { + private void buildPolymer(List words) throws StructureBuildingException { if (words.size()!=2){ throw new StructureBuildingException("Currently unsupported polymer name type"); } Element polymer = words.get(1); resolveWordOrBracket(state, polymer); - BuildResults polymerBr = new BuildResults(state, polymer); - List rGroups = new ArrayList(); + BuildResults polymerBr = new BuildResults(polymer); if (polymerBr.getOutAtomCount() ==2){ - Atom inAtom =polymerBr.getOutAtomTakingIntoAccountWhetherSetExplicitly(0); - Atom outAtom =polymerBr.getOutAtomTakingIntoAccountWhetherSetExplicitly(1); + Atom inAtom = getOutAtomTakingIntoAccountWhetherSetExplicitly(polymerBr, 0); + Atom outAtom = getOutAtomTakingIntoAccountWhetherSetExplicitly(polymerBr, 1); /* * We assume the polymer repeats so as an approximation we create an R group with the same element as the group at the other end of polymer (with valency equal to the bondorder of the Rgroup so no H added) */ - Fragment rGroup1 =state.fragManager.buildSMILES("[" + outAtom.getElement() + "|" + polymerBr.getOutAtom(0).getValency() + "]", "", "alpha"); - state.fragManager.createBond(inAtom, rGroup1.getFirstAtom(), polymerBr.getOutAtom(0).getValency()); - - Fragment rGroup2 =state.fragManager.buildSMILES("[" + inAtom.getElement() + "|" + polymerBr.getOutAtom(1).getValency() + "]", "", "omega"); - state.fragManager.createBond(outAtom, rGroup2.getFirstAtom(), polymerBr.getOutAtom(1).getValency()); - rGroups.add(rGroup1); - rGroups.add(rGroup2); + Atom rGroup1 =state.fragManager.buildSMILES("[" + outAtom.getElement().toString() + "|" + polymerBr.getOutAtom(0).getValency() + "]", "", "alpha").getFirstAtom(); + rGroup1.setProperty(Atom.ATOM_CLASS, 1); + state.fragManager.createBond(inAtom, rGroup1, polymerBr.getOutAtom(0).getValency()); + + Atom rGroup2 =state.fragManager.buildSMILES("[" + inAtom.getElement().toString() + "|" + polymerBr.getOutAtom(1).getValency() + "]", "", "omega").getFirstAtom(); + rGroup2.setProperty(Atom.ATOM_CLASS, 2); + state.fragManager.createBond(outAtom, rGroup2, polymerBr.getOutAtom(1).getValency()); + polymerAttachmentPoints.add(rGroup1); + polymerAttachmentPoints.add(rGroup2); polymerBr.removeAllOutAtoms(); } else{ throw new StructureBuildingException("Polymer building failed: Two termini were not found; Expected 2 outAtoms, found: " +polymerBr.getOutAtomCount()); } - return rGroups; } /** @@ -1506,8 +1694,9 @@ Atom possibleAtom = mainGroupBR.getFunctionalAtom(i); if (possibleAtom.hasLocant(locant)){ mainGroupBR.removeFunctionalAtom(i); - if (possibleAtom.getProperty(Atom.AMBIGUOUS_ELEMENT_ASSIGNMENT)!=null){ - possibleAtom.getProperty(Atom.AMBIGUOUS_ELEMENT_ASSIGNMENT).remove(possibleAtom); + Set degenerateAtoms = possibleAtom.getProperty(Atom.AMBIGUOUS_ELEMENT_ASSIGNMENT); + if (degenerateAtoms != null){ + degenerateAtoms.remove(possibleAtom); } return possibleAtom; } @@ -1518,8 +1707,9 @@ Atom possibleAtom = mainGroupBR.getFunctionalAtom(i); if (OpsinTools.depthFirstSearchForNonSuffixAtomWithLocant(possibleAtom, locant)!=null){ mainGroupBR.removeFunctionalAtom(i); - if (possibleAtom.getProperty(Atom.AMBIGUOUS_ELEMENT_ASSIGNMENT)!=null){ - possibleAtom.getProperty(Atom.AMBIGUOUS_ELEMENT_ASSIGNMENT).remove(possibleAtom); + Set degenerateAtoms = possibleAtom.getProperty(Atom.AMBIGUOUS_ELEMENT_ASSIGNMENT); + if (degenerateAtoms != null){ + degenerateAtoms.remove(possibleAtom); } return possibleAtom; } @@ -1532,38 +1722,38 @@ boolean isElementSymbol = MATCH_ELEMENT_SYMBOL.matcher(locant).matches(); for (int i = 0; i < mainGroupBR.getFunctionalAtomCount(); i++) { Atom possibleAtom = mainGroupBR.getFunctionalAtom(i); - if (possibleAtom.getProperty(Atom.AMBIGUOUS_ELEMENT_ASSIGNMENT)!=null){ - Set atoms =possibleAtom.getProperty(Atom.AMBIGUOUS_ELEMENT_ASSIGNMENT); - boolean foundAtom = false; - for (Atom a : atoms) { - if (a.hasLocant(locant) || (isElementSymbol && a.getElement().equals(locant))){ - //swap locants and element type - List tempLocants = new ArrayList(a.getLocants()); - List tempLocants2 = new ArrayList(possibleAtom.getLocants()); - a.clearLocants(); - possibleAtom.clearLocants(); - for (String l : tempLocants) { - possibleAtom.addLocant(l); - } - for (String l : tempLocants2) { - a.addLocant(l); - } - String originalElement = possibleAtom.getElement(); - possibleAtom.setElement(a.getElement()); - a.setElement(originalElement); - mainGroupBR.removeFunctionalAtom(i); - foundAtom =true; - break; - } - } - if (foundAtom){ - atoms.remove(possibleAtom); - return possibleAtom; - } - } - if (isElementSymbol && possibleAtom.getElement().equals(locant)){ + if (isElementSymbol && possibleAtom.getElement().toString().equals(locant)){ mainGroupBR.removeFunctionalAtom(i); - return possibleAtom; + return possibleAtom; + } + Set degenerateAtoms = possibleAtom.getProperty(Atom.AMBIGUOUS_ELEMENT_ASSIGNMENT); + if (degenerateAtoms != null){ + boolean foundAtom = false; + for (Atom a : degenerateAtoms) { + if (a.hasLocant(locant) || (isElementSymbol && a.getElement().toString().equals(locant))){ + //swap locants and element type + List tempLocants = new ArrayList(a.getLocants()); + List tempLocants2 = new ArrayList(possibleAtom.getLocants()); + a.clearLocants(); + possibleAtom.clearLocants(); + for (String l : tempLocants) { + possibleAtom.addLocant(l); + } + for (String l : tempLocants2) { + a.addLocant(l); + } + ChemEl originalChemEl = possibleAtom.getElement(); + possibleAtom.setElement(a.getElement()); + a.setElement(originalChemEl); + mainGroupBR.removeFunctionalAtom(i); + foundAtom =true; + break; + } + } + if (foundAtom){ + degenerateAtoms.remove(possibleAtom); + return possibleAtom; + } } } } @@ -1572,117 +1762,131 @@ } /** - * Valency is used to determine the expected number of hydrogen - * Hydrogens are then added to bring the number of connections up to the minimum required to satisfy the atom's valency - * This allows the valency of the atom to be encoded e.g. phopshane-3 hydrogen, phosphorane-5 hydrogen. - * It is also neccesary when considering stereochemistry as a hydrogen beats nothing in the CIP rules - * @param state + * Applies explicit stoichiometry, charge balancing and fractional multipliers + * @param molecule + * @param wordRules * @throws StructureBuildingException */ - static void makeHydrogensExplicit(BuildState state) throws StructureBuildingException { - Set fragments = state.fragManager.getFragPile(); - for (Fragment fragment : fragments) { - if (fragment.getSubType().equals(ELEMENTARYATOM_SUBTYPE_VAL)){//these do not have implicit hydrogen e.g. phosphorus is literally just a phosphorus atom - continue; - } - List atomList =fragment.getAtomList(); - for (Atom parentAtom : atomList) { - int explicitHydrogensToAdd = StructureBuildingMethods.calculateSubstitutableHydrogenAtoms(parentAtom); - for (int i = 0; i < explicitHydrogensToAdd; i++) { - Atom hydrogen = state.fragManager.createAtom("H", fragment); - state.fragManager.createBond(parentAtom, hydrogen, 1); - } - if (parentAtom.getAtomParity()!=null){ - if (explicitHydrogensToAdd >1){ - //Cannot have tetrahedral chirality and more than 2 hydrogens - parentAtom.setAtomParity(null);//probably caused by deoxy + private void manipulateStoichiometry(Element molecule, List wordRules) throws StructureBuildingException { + boolean explicitStoichiometryPresent = applyExplicitStoichiometryIfProvided(wordRules); + boolean chargedFractionalGroup = false; + List wordRulesWithFractionalMultipliers = new ArrayList(0); + for (Element wordRule : wordRules) { + Element fractionalMultiplier = wordRule.getChild(0); + while (fractionalMultiplier.getChildCount() != 0){ + fractionalMultiplier = fractionalMultiplier.getChild(0); + } + if (fractionalMultiplier.getName().equals(FRACTIONALMULTIPLIER_EL)) { + if (explicitStoichiometryPresent) { + throw new StructureBuildingException("Fractional multipliers should not be used in conjunction with explicit stoichiometry"); + } + String[] value = fractionalMultiplier.getAttributeValue(VALUE_ATR).split("/"); + if (value.length != 2) { + throw new RuntimeException("OPSIN Bug: malformed fractional multiplier: " + fractionalMultiplier.getAttributeValue(VALUE_ATR)); + } + try { + int numerator = Integer.parseInt(value[0]); + int denominator = Integer.parseInt(value[1]); + if (denominator != 2) { + throw new RuntimeException("Only fractions of a 1/2 currently supported"); } - else{ - modifyAtomParityToTakeIntoAccountExplicitHydrogen(parentAtom); + for (int j = 1; j < numerator; j++) { + Element clone = state.fragManager.cloneElement(state, wordRule); + OpsinTools.insertAfter(wordRule, clone); + wordRulesWithFractionalMultipliers.add(clone); } } + catch (NumberFormatException e) { + throw new RuntimeException("OPSIN Bug: malformed fractional multiplier: " + fractionalMultiplier.getAttributeValue(VALUE_ATR)); + } + //don't detach the fractional multiplier to avoid charge balancing multiplication (cf. handling of mono) + wordRulesWithFractionalMultipliers.add(wordRule); + if (new BuildResults(wordRule).getCharge() !=0){ + chargedFractionalGroup = true; + } } } - } - - private static void modifyAtomParityToTakeIntoAccountExplicitHydrogen(Atom atom) throws StructureBuildingException { - AtomParity atomParity = atom.getAtomParity(); - if (!StereoAnalyser.isPossiblyStereogenic(atom)){ - //no longer a stereoCentre e.g. due to unsaturation - atom.setAtomParity(null); - } - else{ - Atom[] atomRefs4 = atomParity.getAtomRefs4(); - Integer positionOfImplicitHydrogen = null; - Integer positionOfDeoxyHydrogen = null; - for (int i = 0; i < atomRefs4.length; i++) { - if (atomRefs4[i].equals(AtomParity.hydrogen)){ - positionOfImplicitHydrogen = i; - } - else if (atomRefs4[i].equals(AtomParity.deoxyHydrogen)){ - positionOfDeoxyHydrogen = i; - } - } - if (positionOfImplicitHydrogen !=null || positionOfDeoxyHydrogen !=null){ - //atom parity was set in SMILES, the dummy hydrogen atom has now been substituted - List neighbours = atom.getAtomNeighbours(); - for (Atom atomRef : atomRefs4) { - neighbours.remove(atomRef); - } - if (neighbours.size()==0){ - throw new StructureBuildingException("OPSIN Bug: Unable to determine which atom has substituted a hydrogen at stereocentre"); - } - else if (neighbours.size()==1 && positionOfDeoxyHydrogen!=null){ - atomRefs4[positionOfDeoxyHydrogen] = neighbours.get(0); - if (positionOfImplicitHydrogen != null){ - throw new StructureBuildingException("OPSIN Bug: Unable to determine which atom has substituted a hydrogen at stereocentre"); - } - } - else if (neighbours.size()==1 && positionOfImplicitHydrogen!=null){ - atomRefs4[positionOfImplicitHydrogen] = neighbours.get(0); - } - else if (neighbours.size()==2 && positionOfDeoxyHydrogen!=null && positionOfImplicitHydrogen!=null){ - try{ - List cipOrderedAtoms = new CipSequenceRules(atom).getNeighbouringAtomsInCIPOrder(); - //higher priority group replaces the former hydroxy groups (deoxyHydrogen) - if (cipOrderedAtoms.indexOf(neighbours.get(0)) > cipOrderedAtoms.indexOf(neighbours.get(1))){ - atomRefs4[positionOfDeoxyHydrogen] = neighbours.get(0); - atomRefs4[positionOfImplicitHydrogen] = neighbours.get(1); - } - else{ - atomRefs4[positionOfDeoxyHydrogen] = neighbours.get(1); - atomRefs4[positionOfImplicitHydrogen] = neighbours.get(0); - } - } - catch (CipOrderingException e){ - //assume ligands equivalent so it makes no difference which is which - atomRefs4[positionOfDeoxyHydrogen] = neighbours.get(0); - atomRefs4[positionOfImplicitHydrogen] = neighbours.get(1); + if (wordRulesWithFractionalMultipliers.size() > 0) { + if (wordRules.size() == 1) { + throw new StructureBuildingException("Unexpected fractional multiplier found at start of word"); + } + if (chargedFractionalGroup) { + for (Element wordRule : wordRules) { + if (wordRulesWithFractionalMultipliers.contains(wordRule)) { + continue; } + Element clone = state.fragManager.cloneElement(state, wordRule); + OpsinTools.insertAfter(wordRule, clone); } - else{ - throw new StructureBuildingException("OPSIN Bug: Unable to determine which atom has substituted a hydrogen at stereocentre"); + } + } + boolean saltExpected = molecule.getAttribute(ISSALT_ATR) != null; + if (saltExpected) { + deprotonateAcidIfSaltWithMetal(molecule); + } + int overallCharge = state.fragManager.getOverallCharge(); + if (overallCharge!=0 && wordRules.size() >1){//a net charge is present! Could just mean the counterion has not been specified though + balanceChargeIfPossible(molecule, overallCharge, explicitStoichiometryPresent); + } + if (wordRulesWithFractionalMultipliers.size() > 0 && !chargedFractionalGroup) { + for (Element wordRule : molecule.getChildElements(WORDRULE_EL)) { + if (wordRulesWithFractionalMultipliers.contains(wordRule)) { + continue; } + Element clone = state.fragManager.cloneElement(state, wordRule); + OpsinTools.insertAfter(wordRule, clone); } } + } - private boolean applyExplicitStoichiometryIfProvided(BuildState state, Elements wordRules) throws StructureBuildingException { + private boolean applyExplicitStoichiometryIfProvided(List wordRules) throws StructureBuildingException { boolean explicitStoichiometryPresent =false; - for (int i = 0; i < wordRules.size(); i++) { - Element wordRule = wordRules.get(i); + for (Element wordRule : wordRules) { if (wordRule.getAttribute(STOICHIOMETRY_ATR)!=null){ int stoichiometry = Integer.parseInt(wordRule.getAttributeValue(STOICHIOMETRY_ATR)); wordRule.removeAttribute(wordRule.getAttribute(STOICHIOMETRY_ATR)); for (int j = 1; j < stoichiometry; j++) { Element clone = state.fragManager.cloneElement(state, wordRule); - XOMTools.insertAfter(wordRule, clone); + OpsinTools.insertAfter(wordRule, clone); } explicitStoichiometryPresent =true; } } return explicitStoichiometryPresent; } + + + private void deprotonateAcidIfSaltWithMetal(Element molecule) { + List positivelyChargedComponents = new ArrayList(); + List negativelyChargedComponents = new ArrayList(); + List neutralComponents = new ArrayList(); + List wordRules = molecule.getChildElements(WORDRULE_ATR); + for (Element wordRule : wordRules) { + BuildResults br = new BuildResults(wordRule); + int charge = br.getCharge(); + if (charge > 0) { + positivelyChargedComponents.add(br); + } + else if (charge < 0) { + negativelyChargedComponents.add(br); + } + else { + neutralComponents.add(br); + } + } + if (negativelyChargedComponents.size() == 0 && (positivelyChargedComponents.size() > 0 || getMetalsThatCanBeImplicitlyCations(molecule).size() > 0)) { + for (int i = neutralComponents.size() - 1; i>=0; i--) { + BuildResults br =neutralComponents.get(i); + for (int j = br.getFunctionalAtomCount() -1; j >=0; j--) { + Atom functionalAtom = br.getFunctionalAtom(j); + if (functionalAtom.getCharge() == 0 && functionalAtom.getIncomingValency() == 1){ + functionalAtom.addChargeAndProtons(-1, -1); + } + } + } + } + } /** * A net charge is present; Given the molecule element the overallCharge is there an unambiguous way of @@ -1690,46 +1894,38 @@ * metals without specified charge may be given an implicit positive charge * * If this fails look for the case where there are multiple molecules and the mixture is only negative due to negatively charged functional Atoms e.g. pyridine acetate and remove the negative charge - * @param state * @param molecule * @param explicitStoichiometryPresent * @param overallCharge * @throws StructureBuildingException */ - private void balanceChargeIfPossible(BuildState state, Element molecule, int overallCharge, boolean explicitStoichiometryPresent) throws StructureBuildingException { - List wordRules = XOMTools.getChildElementsWithTagName(molecule, WORDRULE_ATR); + private void balanceChargeIfPossible(Element molecule, int overallCharge, boolean explicitStoichiometryPresent) throws StructureBuildingException { + List wordRules = molecule.getChildElements(WORDRULE_ATR); List positivelyChargedComponents = new ArrayList(); List negativelyChargedComponents = new ArrayList(); - HashMap componentToChargeMapping = new HashMap(); - HashMap componentToBR = new HashMap(); + Map componentToChargeMapping = new HashMap(); + Map componentToBR = new HashMap(); - List cationicElements = new ArrayList(); - List elementaryAtoms = XOMTools.getDescendantElementsWithTagNameAndAttribute(molecule, GROUP_EL, SUBTYPE_ATR, ELEMENTARYATOM_SUBTYPE_VAL); - for (Element elementaryAtom : elementaryAtoms) { - if (elementaryAtom.getAttribute(COMMONOXIDATIONSTATESANDMAX_ATR)!=null){ - Fragment cationicFrag =state.xmlFragmentMap.get(elementaryAtom); - if (cationicFrag.getFirstAtom().getCharge()==0){//if not 0 charge cannot be implicitly modified - String[] typicalOxidationStates = MATCH_COMMA.split(MATCH_COLON.split(elementaryAtom.getAttributeValue(COMMONOXIDATIONSTATESANDMAX_ATR))[0]); - int typicalCharge = Integer.parseInt(typicalOxidationStates[typicalOxidationStates.length-1]); - if (typicalCharge > cationicFrag.getFirstAtom().getAtomNeighbours().size()){ - cationicElements.add(elementaryAtom); - } - } - } - } - overallCharge = setCationicElementsToTypicalCharge(state, cationicElements, overallCharge); + List cationicElements = getMetalsThatCanBeImplicitlyCations(molecule); + overallCharge = setCationicElementsToTypicalCharge(cationicElements, overallCharge); if (overallCharge==0){ return; } - if (cationicElements.size() ==1 && overallCharge <0){//e.g. nickel tetrachloride [Ni2+]-->[Ni4+] - boolean success = setChargeOnCationicElementAppropriately(state, overallCharge, cationicElements.get(0)); - if (success){ + if (cationicElements.size() ==1 && overallCharge < 0) {//e.g. nickel tetrachloride [Ni2+]-->[Ni4+] + if (setChargeOnCationicElementAppropriately(overallCharge, cationicElements.get(0))) { return; } } + if (overallCharge == -2) { + if (triHalideSpecialCase(wordRules)) { + //e.g. three iodides --> triiodide ion + return; + } + } + for (Element wordRule : wordRules) { - BuildResults br = new BuildResults(state, wordRule); + BuildResults br = new BuildResults(wordRule); componentToBR.put(wordRule, br); int charge = br.getCharge(); if (charge>0){ @@ -1740,15 +1936,16 @@ } componentToChargeMapping.put(wordRule, charge); } + if (!explicitStoichiometryPresent && (positivelyChargedComponents.size()==1 && cationicElements.size() ==0 && negativelyChargedComponents.size() >=1 || positivelyChargedComponents.size()>=1 && negativelyChargedComponents.size() ==1 )){ - boolean success = multiplyChargedComponents(state, negativelyChargedComponents, positivelyChargedComponents, componentToChargeMapping, overallCharge); + boolean success = multiplyChargedComponents(negativelyChargedComponents, positivelyChargedComponents, componentToChargeMapping, overallCharge); if (success){ return; } } if (cationicElements.size() ==1){//e.g. magnesium monochloride [Mg2+]-->[Mg+] - boolean success = setChargeOnCationicElementAppropriately(state, overallCharge, cationicElements.get(0)); + boolean success = setChargeOnCationicElementAppropriately(overallCharge, cationicElements.get(0)); if (success){ return; } @@ -1779,19 +1976,36 @@ } } + private List getMetalsThatCanBeImplicitlyCations(Element molecule) { + List cationicElements = new ArrayList(); + List elementaryAtoms = OpsinTools.getDescendantElementsWithTagNameAndAttribute(molecule, GROUP_EL, SUBTYPE_ATR, ELEMENTARYATOM_SUBTYPE_VAL); + for (Element elementaryAtom : elementaryAtoms) { + if (elementaryAtom.getAttribute(COMMONOXIDATIONSTATESANDMAX_ATR)!=null){ + Atom metalAtom = elementaryAtom.getFrag().getFirstAtom(); + if (metalAtom.getCharge() == 0 && metalAtom.getProperty(Atom.OXIDATION_NUMBER) == null) {//if not 0 charge cannot be implicitly modified + String[] typicalOxidationStates = elementaryAtom.getAttributeValue(COMMONOXIDATIONSTATESANDMAX_ATR).split(":")[0].split(","); + int typicalCharge = Integer.parseInt(typicalOxidationStates[typicalOxidationStates.length-1]); + if (typicalCharge > metalAtom.getBondCount()){ + cationicElements.add(elementaryAtom); + } + } + } + } + return cationicElements; + } + /** * Sets the cationicElements to the lowest typical charge as specified by the COMMONOXIDATIONSTATESANDMAX_ATR that is >= incoming valency * The valency incoming to the cationicElement is taken into account e.g. phenylmagnesium chloride is [Mg+] - * @param state * @param cationicElements * @param overallCharge * @return */ - private int setCationicElementsToTypicalCharge(BuildState state, List cationicElements, int overallCharge) { + private int setCationicElementsToTypicalCharge(List cationicElements, int overallCharge) { for (Element cationicElement : cationicElements) { - Fragment cationicFrag = state.xmlFragmentMap.get(cationicElement); - String[] typicalOxidationStates = MATCH_COMMA.split(MATCH_COLON.split(cationicElement.getAttributeValue(COMMONOXIDATIONSTATESANDMAX_ATR))[0]); + Fragment cationicFrag = cationicElement.getFrag(); + String[] typicalOxidationStates = cationicElement.getAttributeValue(COMMONOXIDATIONSTATESANDMAX_ATR).split(":")[0].split(","); int incomingValency = cationicFrag.getFirstAtom().getIncomingValency(); for (String typicalOxidationState : typicalOxidationStates) { int charge = Integer.parseInt(typicalOxidationState); @@ -1805,11 +2019,40 @@ } return overallCharge; } + + /** + * Checks for tribromide/triodide and joins the ions if found + * @param wordRules + * @return + */ + private boolean triHalideSpecialCase(List wordRules) { + for (Element wordRule : wordRules) { + if (wordRule.getChildCount() == 3) { + String value = wordRule.getAttributeValue(VALUE_ATR); + if ("tribromide".equals(value) || "tribromid".equals(value) || "triiodide".equals(value) || "triiodid".equals(value)) { + List groups1 = OpsinTools.getDescendantElementsWithTagName(wordRule.getChild(0), GROUP_EL); + List groups2 = OpsinTools.getDescendantElementsWithTagName(wordRule.getChild(1), GROUP_EL); + List groups3 = OpsinTools.getDescendantElementsWithTagName(wordRule.getChild(2), GROUP_EL); + if (groups1.size() != 1 || groups2.size() != 1 || groups3.size() != 1) { + throw new RuntimeException("OPSIN Bug: Unexpected trihalide representation"); + } + Atom centralAtom = groups1.get(0).getFrag().getFirstAtom(); + Atom otherAtom1 = groups2.get(0).getFrag().getFirstAtom(); + otherAtom1.setCharge(0); + Atom otherAtom2 = groups3.get(0).getFrag().getFirstAtom(); + otherAtom2.setCharge(0); + state.fragManager.createBond(centralAtom, otherAtom1, 1); + state.fragManager.createBond(centralAtom, otherAtom2, 1); + return true; + } + } + } + return false; + } /** * Multiplies out charged word rules to balance charge * Return true if balancing was possible else false - * @param state * @param negativelyChargedComponents * @param positivelyChargedComponents * @param componentToChargeMapping @@ -1817,7 +2060,7 @@ * @return * @throws StructureBuildingException */ - private boolean multiplyChargedComponents(BuildState state, ListnegativelyChargedComponents,List positivelyChargedComponents,HashMap componentToChargeMapping, int overallCharge) throws StructureBuildingException { + private boolean multiplyChargedComponents(ListnegativelyChargedComponents, List positivelyChargedComponents, Map componentToChargeMapping, int overallCharge) throws StructureBuildingException { Element componentToMultiply; if (overallCharge >0){ if (negativelyChargedComponents.size() >1){ @@ -1839,7 +2082,7 @@ } int timesToDuplicate = Math.abs(overallCharge/charge); for (int i = 0; i < timesToDuplicate; i++) { - XOMTools.insertAfter(componentToMultiply, state.fragManager.cloneElement(state, componentToMultiply)); + OpsinTools.insertAfter(componentToMultiply, state.fragManager.cloneElement(state, componentToMultiply)); } } else{//e.g. iron(3+) sulfate -->2:3 mixture @@ -1853,33 +2096,33 @@ int negativeCharge = Math.abs(componentToChargeMapping.get(negativelyChargedComponents.get(0))); int targetTotalAbsoluteCharge = positiveCharge * negativeCharge; for (int i = (targetTotalAbsoluteCharge/negativeCharge); i >1; i--) { - XOMTools.insertAfter(negativelyChargedComponents.get(0), state.fragManager.cloneElement(state, negativelyChargedComponents.get(0))); + OpsinTools.insertAfter(negativelyChargedComponents.get(0), state.fragManager.cloneElement(state, negativelyChargedComponents.get(0))); } for (int i = (targetTotalAbsoluteCharge/positiveCharge); i >1; i--) { - XOMTools.insertAfter(positivelyChargedComponents.get(0), state.fragManager.cloneElement(state, positivelyChargedComponents.get(0))); + OpsinTools.insertAfter(positivelyChargedComponents.get(0), state.fragManager.cloneElement(state, positivelyChargedComponents.get(0))); } } return true; } private boolean componentCanBeMultiplied(Element componentToMultiply) { - if (componentToMultiply.getAttributeValue(WORDRULE_ATR).equals(WordRule.simple.toString()) && XOMTools.getChildElementsWithTagNameAndAttribute(componentToMultiply, WORD_EL, TYPE_ATR, WordType.full.toString()).size()>1){ + if (componentToMultiply.getAttributeValue(WORDRULE_ATR).equals(WordRule.simple.toString()) && OpsinTools.getChildElementsWithTagNameAndAttribute(componentToMultiply, WORD_EL, TYPE_ATR, WordType.full.toString()).size()>1){ return false;//already has been multiplied e.g. dichloride } - Element firstChild = (Element) componentToMultiply.getChild(0); - while (firstChild.getChildElements().size() !=0){ - firstChild = (Element) firstChild.getChild(0); + Element firstChild = componentToMultiply.getChild(0); + while (firstChild.getChildCount() != 0){ + firstChild = firstChild.getChild(0); } - if (firstChild.getLocalName().equals(MULTIPLIER_EL)){//e.g. monochloride. Allows specification of explicit stoichiometry + if (firstChild.getName().equals(MULTIPLIER_EL) || firstChild.getName().equals(FRACTIONALMULTIPLIER_EL) ){//e.g. monochloride. Allows specification of explicit stoichiometry return false; } return true; } - private boolean setChargeOnCationicElementAppropriately(BuildState state, int overallCharge, Element cationicElement) { - Atom cation = state.xmlFragmentMap.get(cationicElement).getFirstAtom(); + private boolean setChargeOnCationicElementAppropriately(int overallCharge, Element cationicElement) { + Atom cation = cationicElement.getFrag().getFirstAtom(); int chargeOnCationNeeded = -(overallCharge -cation.getCharge()); - int maximumCharge = Integer.parseInt(MATCH_COLON.split(cationicElement.getAttributeValue(COMMONOXIDATIONSTATESANDMAX_ATR))[1]); + int maximumCharge = Integer.parseInt(cationicElement.getAttributeValue(COMMONOXIDATIONSTATESANDMAX_ATR).split(":")[1]); if (chargeOnCationNeeded >=0 && chargeOnCationNeeded <= maximumCharge){ cation.setCharge(chargeOnCationNeeded); return true; @@ -1888,8 +2131,8 @@ } private Element findRightMostGroupInWordOrWordRule(Element wordOrWordRule) throws StructureBuildingException { - if (wordOrWordRule.getLocalName().equals(WORDRULE_EL)){ - List words = XOMTools.getDescendantElementsWithTagName(wordOrWordRule, WORD_EL); + if (wordOrWordRule.getName().equals(WORDRULE_EL)){ + List words = OpsinTools.getDescendantElementsWithTagName(wordOrWordRule, WORD_EL); for (int i = words.size() -1 ; i >=0; i--) {//ignore functionalTerm Words if (words.get(i).getAttributeValue(TYPE_ATR).equals(WordType.functionalTerm.toString())){ words.remove(words.get(i)); @@ -1900,7 +2143,7 @@ } return StructureBuildingMethods.findRightMostGroupInBracket(words.get(words.size()-1)); } - else if (wordOrWordRule.getLocalName().equals(WORD_EL)){//word element can be treated just like a bracket + else if (wordOrWordRule.getName().equals(WORD_EL)){//word element can be treated just like a bracket return StructureBuildingMethods.findRightMostGroupInBracket(wordOrWordRule); } else{ @@ -1912,24 +2155,26 @@ * Nasty special case to cope with oxido and related groups acting as O= or even [O-][N+] * This nasty behaviour is in generated ChemDraw names and is supported by most nameToStructure tools so it is supported here * Acting as O= notably is often correct behaviour for inorganics - * @param state + * + * Methionine (and the like) when substituted at the sulfur/selenium/tellurium are implicitly positively charged * @param groups */ - private void processOxidoSpecialCase(BuildState state, List groups) { + private void processOxidoAndMethionineSpecialCases(List groups) { for (Element group : groups) { - if (OXIDOLIKE_SUBTYPE_VAL.equals(group.getAttributeValue(SUBTYPE_ATR))){ - Atom oxidoAtom = state.xmlFragmentMap.get(group).getFirstAtom(); + String subType = group.getAttributeValue(SUBTYPE_ATR); + if (OXIDOLIKE_SUBTYPE_VAL.equals(subType)){ + Atom oxidoAtom = group.getFrag().getFirstAtom(); Atom connectedAtom = oxidoAtom.getAtomNeighbours().get(0); - String element = connectedAtom.getElement(); - if (checkForConnectedOxo(state, connectedAtom)){//e.g. not oxido(trioxo)ruthenium + ChemEl chemEl = connectedAtom.getElement(); + if (checkForConnectedOxo(connectedAtom)){//e.g. not oxido(trioxo)ruthenium continue; } if (ELEMENTARYATOM_SUBTYPE_VAL.equals(connectedAtom.getFrag().getSubType()) || - ((element.equals("S") || element.equals("P")) && connectedAtom.getCharge() ==0 && ValencyChecker.checkValencyAvailableForBond(connectedAtom, 1))){ + ((chemEl == ChemEl.S || chemEl == ChemEl.P) && connectedAtom.getCharge() ==0 && ValencyChecker.checkValencyAvailableForBond(connectedAtom, 1))){ oxidoAtom.neutraliseCharge(); oxidoAtom.getFirstBond().setOrder(2); } - else if (element.equals("N") && connectedAtom.getCharge()==0){ + else if (chemEl == ChemEl.N && connectedAtom.getCharge()==0){ int incomingValency = connectedAtom.getIncomingValency(); if ((incomingValency + connectedAtom.getOutValency()) ==3 && connectedAtom.hasSpareValency()){ connectedAtom.addChargeAndProtons(1, 1);//e.g. N-oxidopyridine @@ -1946,6 +2191,14 @@ } } } + else if (AMINOACID_TYPE_VAL.equals(group.getAttributeValue(TYPE_ATR))) { + for (Atom atom : group.getFrag().getAtomList()) { + if (atom.getElement().isChalcogen() && atom.getElement() != ChemEl.O && + atom.getBondCount() == 3 && atom.getIncomingValency() == 3 && atom.getCharge() == 0) { + atom.addChargeAndProtons(1, 1); + } + } + } } } @@ -1954,7 +2207,7 @@ * @param atom * @return */ - private boolean checkForConnectedOxo(BuildState state, Atom atom) { + private boolean checkForConnectedOxo(Atom atom) { List bonds = atom.getBonds(); for (Bond bond : bonds) { Atom connectedAtom; @@ -1964,7 +2217,7 @@ else{ connectedAtom = bond.getFromAtom(); } - Element correspondingEl = state.xmlFragmentMap.getElement(connectedAtom.getFrag()); + Element correspondingEl = connectedAtom.getFrag().getTokenEl(); if (correspondingEl.getValue().equals("oxo")){ return true; } @@ -1975,19 +2228,18 @@ /** * Sets the charge according to the oxidation number if the oxidation number atom property has been set - * @param state * @param groups * @throws StructureBuildingException */ - private void processOxidationNumbers(BuildState state, List groups) throws StructureBuildingException { + private void processOxidationNumbers(List groups) throws StructureBuildingException { for (Element group : groups) { if (ELEMENTARYATOM_SUBTYPE_VAL.equals(group.getAttributeValue(SUBTYPE_ATR))){ - Atom atom = state.xmlFragmentMap.get(group).getFirstAtom(); + Atom atom = group.getFrag().getFirstAtom(); if (atom.getProperty(Atom.OXIDATION_NUMBER)!=null){ List neighbours = atom.getAtomNeighbours(); int chargeThatWouldFormIfLigandsWereRemoved =0; for (Atom neighbour : neighbours) { - Element neighbourEl = state.xmlFragmentMap.getElement(neighbour.getFrag()); + Element neighbourEl = neighbour.getFrag().getTokenEl(); Bond b = atom.getBondToAtomOrThrow(neighbour); //carbonyl and nitrosyl are neutral ligands if (!((neighbourEl.getValue().equals("carbon") && NONCARBOXYLICACID_TYPE_VAL.equals(neighbourEl.getAttributeValue(TYPE_ATR))) @@ -2005,12 +2257,11 @@ /** * Handles the application of stereochemistry and checking * existing stereochemical specification is still relevant. - * @param state * @param molecule * @param uniFrag * @throws StructureBuildingException */ - private void processStereochemistry(BuildState state, Element molecule, Fragment uniFrag) throws StructureBuildingException { + private void processStereochemistry(Element molecule, Fragment uniFrag) throws StructureBuildingException { List stereoChemistryEls = findStereochemistryElsInProcessingOrder(molecule); List atomList = uniFrag.getAtomList(); List atomsWithPreDefinedAtomParity = new ArrayList(); @@ -2042,17 +2293,7 @@ } } StereochemistryHandler stereoChemistryHandler = new StereochemistryHandler(state, atomStereoCentreMap, bondStereoBondMap); - try { - stereoChemistryHandler.applyStereochemicalElements(stereoChemistryEls); - } - catch (StereochemistryException e) { - if (state.n2sConfig.warnRatherThanFailOnUninterpretableStereochemistry()){ - state.addWarningMessage(e.getMessage()); - } - else{ - throw e; - } - } + stereoChemistryHandler.applyStereochemicalElements(stereoChemistryEls); stereoChemistryHandler.removeRedundantStereoCentres(atomsWithPreDefinedAtomParity, bondsWithPreDefinedBondStereo); } } @@ -2060,16 +2301,16 @@ /** * Finds stereochemistry els in a recursive right to left manner. * Within the same scope though stereochemistry els are found left to right - * @param molecule + * @param parentEl * @return */ private List findStereochemistryElsInProcessingOrder(Element parentEl) { List matchingElements = new ArrayList(); - Elements children =parentEl.getChildElements(); + List children =parentEl.getChildElements(); List stereochemistryElsAtThisLevel = new ArrayList(); for (int i = children.size()-1; i >=0; i--) { Element child = children.get(i); - if (child.getLocalName().equals(STEREOCHEMISTRY_EL)){ + if (child.getName().equals(STEREOCHEMISTRY_EL)){ stereochemistryElsAtThisLevel.add(child); } else{ @@ -2081,17 +2322,31 @@ return matchingElements; } - private List convertOutAtomsToRgroups(BuildState state, Fragment uniFrag) throws StructureBuildingException { - List rGroups = new ArrayList(); + private void convertOutAtomsToAttachmentAtoms(Fragment uniFrag) throws StructureBuildingException { int outAtomCount = uniFrag.getOutAtomCount(); for (int i = outAtomCount -1; i >=0; i--) { OutAtom outAtom = uniFrag.getOutAtom(i); uniFrag.removeOutAtom(i); - Fragment rGroup =state.fragManager.buildSMILES("[R|" + outAtom.getValency() + "]", "", NONE_LABELS_VAL); - state.fragManager.createBond(outAtom.getAtom(), rGroup.getFirstAtom(), outAtom.getValency()); - state.fragManager.incorporateFragment(rGroup, uniFrag); - rGroups.add(rGroup); + Atom rGroup = state.fragManager.createAtom(ChemEl.R, uniFrag); + state.fragManager.createBond(outAtom.getAtom(), rGroup, outAtom.getValency()); + } + } + + /** + * Returns the atom corresponding to position i in the outAtoms list + * If the outAtom is not set explicitly a suitable atom will be found + * @param buildResults + * @param i index + * @return atom + * @throws StructureBuildingException + */ + private Atom getOutAtomTakingIntoAccountWhetherSetExplicitly(BuildResults buildResults, int i) throws StructureBuildingException { + OutAtom outAtom = buildResults.getOutAtom(i); + if (outAtom.isSetExplicitly()){ + return outAtom.getAtom(); + } + else{ + return findAtomForUnlocantedRadical(state, outAtom.getAtom().getFrag(), outAtom); } - return rGroups; } } diff -Nru opsin-1.5.0/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/StructureBuildingException.java opsin-2.3.1/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/StructureBuildingException.java --- opsin-1.5.0/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/StructureBuildingException.java 2013-07-21 14:02:29.000000000 +0000 +++ opsin-2.3.1/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/StructureBuildingException.java 2017-07-23 20:55:18.000000000 +0000 @@ -1,6 +1,6 @@ package uk.ac.cam.ch.wwmm.opsin; -/**Thrown during the construction of the CML molecule. +/**Thrown during assembly of the structure * * @author ptc24 * @@ -11,22 +11,18 @@ StructureBuildingException() { super(); - // TODO Auto-generated constructor stub } StructureBuildingException(String message) { super(message); - // TODO Auto-generated constructor stub } StructureBuildingException(String message, Throwable cause) { super(message, cause); - // TODO Auto-generated constructor stub } StructureBuildingException(Throwable cause) { super(cause); - // TODO Auto-generated constructor stub } } diff -Nru opsin-1.5.0/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/StructureBuildingMethods.java opsin-2.3.1/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/StructureBuildingMethods.java --- opsin-1.5.0/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/StructureBuildingMethods.java 2013-07-21 14:02:29.000000000 +0000 +++ opsin-2.3.1/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/StructureBuildingMethods.java 2017-07-23 20:55:18.000000000 +0000 @@ -1,24 +1,26 @@ package uk.ac.cam.ch.wwmm.opsin; +import java.util.ArrayDeque; import java.util.ArrayList; +import java.util.Collection; +import java.util.Collections; +import java.util.Deque; +import java.util.HashMap; import java.util.HashSet; -import java.util.LinkedList; import java.util.List; +import java.util.Map; +import java.util.Map.Entry; import java.util.Set; -import java.util.Stack; import java.util.regex.Matcher; import java.util.regex.Pattern; import org.apache.log4j.Logger; +import uk.ac.cam.ch.wwmm.opsin.IsotopeSpecificationParser.IsotopeSpecification; + import static uk.ac.cam.ch.wwmm.opsin.XmlDeclarations.*; import static uk.ac.cam.ch.wwmm.opsin.OpsinTools.*; - -import nu.xom.Attribute; -import nu.xom.Element; -import nu.xom.Elements; - /** * Methods for processing the substitutive and additive operations that connect all the fragments together * as well as indicated hydrogen/unsaturation/heteroatom replacement @@ -27,7 +29,8 @@ */ class StructureBuildingMethods { private static final Logger LOG = Logger.getLogger(StructureBuildingMethods.class); - private final static Pattern matchCompoundLocant =Pattern.compile("[\\[\\(\\{](\\d+[a-z]?'*)[\\]\\)\\}]"); + private static final Pattern matchCompoundLocant =Pattern.compile("[\\[\\(\\{](\\d+[a-z]?'*)[\\]\\)\\}]"); + private StructureBuildingMethods() {} /** @@ -44,26 +47,26 @@ * @throws StructureBuildingException */ static void resolveWordOrBracket(BuildState state, Element word) throws StructureBuildingException { - if (word.getLocalName().equals(WORDRULE_EL)){//already been resolved + if (word.getName().equals(WORDRULE_EL)){//already been resolved return; } - if (!word.getLocalName().equals(WORD_EL) && !word.getLocalName().equals(BRACKET_EL)){ + if (!word.getName().equals(WORD_EL) && !word.getName().equals(BRACKET_EL)){ throw new StructureBuildingException("A word or bracket is the expected input"); } recursivelyResolveLocantedFeatures(state, word); recursivelyResolveUnLocantedFeatures(state, word); //TODO check all things that can substitute have outAtoms //TOOD think whether you can avoid the need to have a cansubstitute function by only using appropriate group - List subsBracketsAndRoots = XOMTools.getDescendantElementsWithTagNames(word, new String[]{BRACKET_EL, SUBSTITUENT_EL, ROOT_EL}); - for (Element subsBracketsAndRoot : subsBracketsAndRoots) { - if (subsBracketsAndRoot.getAttribute(MULTIPLIER_ATR) != null) { - throw new StructureBuildingException("Structure building problem: multiplier on :" + subsBracketsAndRoot.getLocalName() + " was never used"); - } - } - List groups = XOMTools.getDescendantElementsWithTagName(word, GROUP_EL); + List subsBracketsAndRoots = OpsinTools.getDescendantElementsWithTagNames(word, new String[]{BRACKET_EL, SUBSTITUENT_EL, ROOT_EL}); + for (Element subsBracketsAndRoot : subsBracketsAndRoots) { + if (subsBracketsAndRoot.getAttribute(MULTIPLIER_ATR) != null) { + throw new StructureBuildingException("Structure building problem: multiplier on :" + subsBracketsAndRoot.getName() + " was never used"); + } + } + List groups = OpsinTools.getDescendantElementsWithTagName(word, GROUP_EL); for (int i = 0; i < groups.size(); i++) { Element group = groups.get(i); - if (group.getAttribute(RESOLVED_ATR)==null && i!=groups.size()-1){ + if (group.getAttribute(RESOLVED_ATR)==null && i != groups.size()-1){ throw new StructureBuildingException("Structure building problem: Bond was not made from :" +group.getValue() + " but one should of been"); } } @@ -79,15 +82,15 @@ * @throws StructureBuildingException */ static void recursivelyResolveLocantedFeatures(BuildState state, Element word) throws StructureBuildingException { - if (!word.getLocalName().equals(WORD_EL) && !word.getLocalName().equals(BRACKET_EL)){ + if (!word.getName().equals(WORD_EL) && !word.getName().equals(BRACKET_EL)){ throw new StructureBuildingException("A word or bracket is the expected input"); } - List subsBracketsAndRoots = XOMTools.getChildElementsWithTagNames(word, new String[]{BRACKET_EL, SUBSTITUENT_EL, ROOT_EL}); + List subsBracketsAndRoots = OpsinTools.getChildElementsWithTagNames(word, new String[]{BRACKET_EL, SUBSTITUENT_EL, ROOT_EL}); //substitution occurs left to right so by doing this right to left you ensure that any groups that will come into existence //due to multipliers being expanded will be in existence for (int i =subsBracketsAndRoots.size()-1; i>=0; i--) { Element subBracketOrRoot = subsBracketsAndRoots.get(i); - if (subBracketOrRoot.getLocalName().equals(BRACKET_EL)){ + if (subBracketOrRoot.getName().equals(BRACKET_EL)){ recursivelyResolveLocantedFeatures(state,subBracketOrRoot); if (potentiallyCanSubstitute(subBracketOrRoot)){ performAdditiveOperations(state, subBracketOrRoot); @@ -110,15 +113,15 @@ * @throws StructureBuildingException */ static void recursivelyResolveUnLocantedFeatures(BuildState state, Element word) throws StructureBuildingException { - if (!word.getLocalName().equals(WORD_EL) && !word.getLocalName().equals(BRACKET_EL)){ + if (!word.getName().equals(WORD_EL) && !word.getName().equals(BRACKET_EL)){ throw new StructureBuildingException("A word or bracket is the expected input"); } - List subsBracketsAndRoots = XOMTools.getChildElementsWithTagNames(word, new String[]{BRACKET_EL, SUBSTITUENT_EL, ROOT_EL}); + List subsBracketsAndRoots = OpsinTools.getChildElementsWithTagNames(word, new String[]{BRACKET_EL, SUBSTITUENT_EL, ROOT_EL}); //substitution occurs left to right so by doing this right to left you ensure that any groups that will come into existence //due to multipliers being expanded will be in existence for (int i =subsBracketsAndRoots.size()-1; i>=0; i--) { Element subBracketOrRoot = subsBracketsAndRoots.get(i); - if (subBracketOrRoot.getLocalName().equals(BRACKET_EL)){ + if (subBracketOrRoot.getName().equals(BRACKET_EL)){ recursivelyResolveUnLocantedFeatures(state,subBracketOrRoot); if (potentiallyCanSubstitute(subBracketOrRoot)){ performUnLocantedSubstitutiveOperations(state, subBracketOrRoot); @@ -156,31 +159,31 @@ private static void performLocantedSubstitutiveOperations(BuildState state, Element subBracketOrRoot) throws StructureBuildingException { Element group; - if (subBracketOrRoot.getLocalName().equals(BRACKET_EL)){ - group =findRightMostGroupInBracket(subBracketOrRoot); + if (subBracketOrRoot.getName().equals(BRACKET_EL)) { + group = findRightMostGroupInBracket(subBracketOrRoot); } else{ - group =subBracketOrRoot.getFirstChildElement(GROUP_EL); + group = subBracketOrRoot.getFirstChildElement(GROUP_EL); } - if (group.getAttribute(RESOLVED_ATR)!=null){ + if (group.getAttribute(RESOLVED_ATR) != null) { return; } - Fragment frag = state.xmlFragmentMap.get(group); - if (frag.getOutAtomCount() >=1 && subBracketOrRoot.getAttribute(LOCANT_ATR)!=null){ + Fragment frag = group.getFrag(); + if (frag.getOutAtomCount() >=1 && subBracketOrRoot.getAttribute(LOCANT_ATR) != null){ String locantString = subBracketOrRoot.getAttributeValue(LOCANT_ATR); if (frag.getOutAtomCount() >1){ checkAndApplySpecialCaseWhereOutAtomsCanBeCombinedOrThrow(frag, group); } - if (subBracketOrRoot.getAttribute(MULTIPLIER_ATR)!=null){//e.g. 1,2-diethyl + if (subBracketOrRoot.getAttribute(MULTIPLIER_ATR) != null) {//e.g. 1,2-diethyl multiplyOutAndSubstitute(state, subBracketOrRoot); } else{ - Fragment parentFrag = findFragmentWithLocant(state, subBracketOrRoot, locantString); - if (parentFrag==null){ + Fragment parentFrag = findFragmentWithLocant(subBracketOrRoot, locantString); + if (parentFrag == null){ String modifiedLocant = checkForBracketedPrimedLocantSpecialCase(subBracketOrRoot, locantString); if (modifiedLocant != null){ - parentFrag = findFragmentWithLocant(state, subBracketOrRoot, modifiedLocant); - if (parentFrag !=null){ + parentFrag = findFragmentWithLocant(subBracketOrRoot, modifiedLocant); + if (parentFrag != null){ locantString = modifiedLocant; } } @@ -189,19 +192,24 @@ throw new StructureBuildingException("Cannot find in scope fragment with atom with locant " + locantString + "."); } group.addAttribute(new Attribute(RESOLVED_ATR, "yes")); - Element groupToAttachTo = state.xmlFragmentMap.getElement(parentFrag); - if (groupToAttachTo.getAttribute(ACCEPTSADDITIVEBONDS_ATR)!=null && parentFrag.getOutAtomCount()>0 && groupToAttachTo.getAttribute(ISAMULTIRADICAL_ATR)!=null - && parentFrag.getAtomByLocantOrThrow(locantString).getOutValency()>0 && frag.getOutAtom(0).getValency()==1){ - //horrible special case to allow C-methylcarbonimidoyl + Element groupToAttachTo = parentFrag.getTokenEl(); + if (groupToAttachTo.getAttribute(ACCEPTSADDITIVEBONDS_ATR) != null && + parentFrag.getOutAtomCount() > 0 && + groupToAttachTo.getAttribute(ISAMULTIRADICAL_ATR) != null && + parentFrag.getAtomByLocantOrThrow(locantString).getOutValency() > 0 && + frag.getOutAtom(0).getValency() == 1 && + parentFrag.getFirstAtom().equals(parentFrag.getAtomByLocantOrThrow(locantString))) { + //horrible special case to allow C-hydroxycarbonimidoyl and the like + //If additive nomenclature the first atom should be an out atom joinFragmentsAdditively(state, frag, parentFrag); } else{ Atom atomToSubstituteAt = parentFrag.getAtomByLocantOrThrow(locantString); if (PHOSPHO_SUBTYPE_VAL.equals(group.getAttributeValue(SUBTYPE_ATR)) && frag.getOutAtom(0).getValency() == 1){ - if (!atomToSubstituteAt.getElement().equals("O")){ + if (atomToSubstituteAt.getElement() != ChemEl.O){ for (Atom neighbour : atomToSubstituteAt.getAtomNeighbours()) { - if (neighbour.getElement().equals("O") && - neighbour.getBonds().size()==1 && + if (neighbour.getElement() == ChemEl.O && + neighbour.getBondCount()==1 && neighbour.getFirstBond().getOrder() == 1 && neighbour.getOutValency() == 0 && neighbour.getCharge() == 0){ @@ -219,49 +227,52 @@ private static void performUnLocantedSubstitutiveOperations(BuildState state, Element subBracketOrRoot) throws StructureBuildingException { Element group; - if (subBracketOrRoot.getLocalName().equals(BRACKET_EL)){ - group =findRightMostGroupInBracket(subBracketOrRoot); + if (subBracketOrRoot.getName().equals(BRACKET_EL)){ + group = findRightMostGroupInBracket(subBracketOrRoot); } else{ - group =subBracketOrRoot.getFirstChildElement(GROUP_EL); + group = subBracketOrRoot.getFirstChildElement(GROUP_EL); } - if (group.getAttribute(RESOLVED_ATR)!=null){ + if (group.getAttribute(RESOLVED_ATR) != null){ return; } - Fragment frag = state.xmlFragmentMap.get(group); - if (frag.getOutAtomCount() >=1){ - if (subBracketOrRoot.getAttribute(LOCANT_ATR)!=null){ - throw new StructureBuildingException("Substituent has an unused outAtom and has a locant but locanted susbtitution should already been been performed!"); + Fragment frag = group.getFrag(); + if (frag.getOutAtomCount() >= 1){ + if (subBracketOrRoot.getAttribute(LOCANT_ATR) != null){ + throw new RuntimeException("Substituent has an unused outAtom and has a locant but locanted substitution should already have been performed!"); } if (frag.getOutAtomCount() > 1){ checkAndApplySpecialCaseWhereOutAtomsCanBeCombinedOrThrow(frag, group); } - if (subBracketOrRoot.getAttribute(MULTIPLIER_ATR)!=null){//e.g. diethyl + if (subBracketOrRoot.getAttribute(MULTIPLIER_ATR) != null) {//e.g. diethyl multiplyOutAndSubstitute(state, subBracketOrRoot); } else{ - if (PERHALOGENO_SUBTYPE_VAL.equals(group.getAttributeValue(SUBTYPE_ATR))){ + if (PERHALOGENO_SUBTYPE_VAL.equals(group.getAttributeValue(SUBTYPE_ATR))) { performPerHalogenoSubstitution(state, frag, subBracketOrRoot); } else{ - Atom atomToJoinTo = null; + List atomsToJoinTo = null; if (PHOSPHO_SUBTYPE_VAL.equals(group.getAttributeValue(SUBTYPE_ATR)) && frag.getOutAtom(0).getValency() == 1){ - List possibleParents =findAlternativeFragments(state, subBracketOrRoot); + List possibleParents = findAlternativeFragments(subBracketOrRoot); for (Fragment fragment : possibleParents) { List hydroxyAtoms = FragmentTools.findHydroxyGroups(fragment); - if (hydroxyAtoms.size() >0){ - atomToJoinTo =hydroxyAtoms.get(0); + if (hydroxyAtoms.size() >= 1){ + atomsToJoinTo = hydroxyAtoms; } break; } } - if (atomToJoinTo ==null) { - atomToJoinTo = findAtomForSubstitution(state, subBracketOrRoot, frag.getOutAtom(0).getValency()); + if (atomsToJoinTo == null) { + atomsToJoinTo = findAtomsForSubstitution(subBracketOrRoot, 1, frag.getOutAtom(0).getValency()); } - if (atomToJoinTo ==null){ + if (atomsToJoinTo == null){ throw new StructureBuildingException("Unlocanted substitution failed: unable to find suitable atom to bond atom with id:" + frag.getOutAtom(0).getAtom().getID() + " to!"); } - joinFragmentsSubstitutively(state, frag, atomToJoinTo); + if (AmbiguityChecker.isSubstitutionAmbiguous(atomsToJoinTo, 1)) { + state.addIsAmbiguous("Connection of " + group.getValue() + " to " + atomsToJoinTo.get(0).getFrag().getTokenEl().getValue()); + } + joinFragmentsSubstitutively(state, frag, atomsToJoinTo.get(0)); } group.addAttribute(new Attribute(RESOLVED_ATR, "yes")); } @@ -277,13 +288,13 @@ * @throws StructureBuildingException */ private static void performPerHalogenoSubstitution(BuildState state, Fragment perhalogenFrag, Element subBracketOrRoot) throws StructureBuildingException { - List fragmentsToAttachTo = findAlternativeFragments(state, subBracketOrRoot); + List fragmentsToAttachTo = findAlternativeFragments(subBracketOrRoot); List atomsToHalogenate = new ArrayList(); for (Fragment fragment : fragmentsToAttachTo) { FragmentTools.convertSpareValenciesToDoubleBonds(fragment); for (Atom atom : fragment.getAtomList()) { int substitutableHydrogen = calculateSubstitutableHydrogenAtoms(atom); - if (substitutableHydrogen > 0 && FragmentTools.isCharacteristicAtom(atom)){ + if (substitutableHydrogen > 0 && FragmentTools.isCharacteristicAtom(atom)){ continue; } for (int i = 0; i < substitutableHydrogen; i++) { @@ -291,13 +302,16 @@ } } } + if (atomsToHalogenate.size() == 0){ + throw new RuntimeException("Failed to find any substitutable hydrogen to apply " + perhalogenFrag.getTokenEl().getValue() + " to!"); + } List halogens = new ArrayList(); halogens.add(perhalogenFrag); - for (int i = 0; i < atomsToHalogenate.size()-1; i++) { + for (int i = 0; i < atomsToHalogenate.size() - 1; i++) { halogens.add(state.fragManager.copyFragment(perhalogenFrag)); } for (int i = 0; i < atomsToHalogenate.size(); i++) { - Fragment halogen =halogens.get(i); + Fragment halogen = halogens.get(i); Atom from = halogen.getOutAtom(0).getAtom(); halogen.removeOutAtom(0); state.fragManager.createBond(from, atomsToHalogenate.get(i), 1); @@ -318,32 +332,34 @@ * @throws StructureBuildingException */ private static void multiplyOutAndSubstitute(BuildState state, Element subOrBracket) throws StructureBuildingException { - int multiplier = Integer.parseInt(subOrBracket.getAttributeValue(MULTIPLIER_ATR)); - subOrBracket.removeAttribute(subOrBracket.getAttribute(MULTIPLIER_ATR)); - String[] locants =null; - if (subOrBracket.getAttribute(LOCANT_ATR) !=null){ - locants = MATCH_COMMA.split(subOrBracket.getAttributeValue(LOCANT_ATR)); + Attribute multiplierAtr = subOrBracket.getAttribute(MULTIPLIER_ATR); + int multiplier = Integer.parseInt(multiplierAtr.getValue()); + subOrBracket.removeAttribute(multiplierAtr); + String[] locants = null; + String locantsAtrValue = subOrBracket.getAttributeValue(LOCANT_ATR); + if (locantsAtrValue != null){ + locants = locantsAtrValue.split(","); } - Element parentWordOrBracket =(Element) subOrBracket.getParent(); + Element parentWordOrBracket = subOrBracket.getParent(); int indexOfSubOrBracket = parentWordOrBracket.indexOf(subOrBracket); subOrBracket.detach(); List elementsNotToBeMultiplied = new ArrayList();//anything before the multiplier in the sub/bracket Element multiplierEl = subOrBracket.getFirstChildElement(MULTIPLIER_EL); - if (multiplierEl ==null){ - throw new StructureBuildingException("Multiplier not found where multiplier expected"); + if (multiplierEl == null){ + throw new RuntimeException("Multiplier not found where multiplier expected"); } - for (int j = subOrBracket.indexOf(multiplierEl) -1 ; j >=0 ; j--) { - Element el = (Element) subOrBracket.getChild(j); + for (int i = subOrBracket.indexOf(multiplierEl) -1 ; i >=0 ; i--) { + Element el = subOrBracket.getChild(i); el.detach(); elementsNotToBeMultiplied.add(el); } multiplierEl.detach(); List multipliedElements = new ArrayList(); - for (int i = multiplier -1; i >=0; i--) { + for (int i = multiplier - 1; i >=0; i--) { Element currentElement; - if (i!=0){ + if (i != 0){ currentElement = state.fragManager.cloneElement(state, subOrBracket, i); addPrimesToLocantedStereochemistryElements(currentElement, StringTools.multiplyString("'", i));//Stereochemistry elements with locants will need to have their locants primed (stereochemistry is only processed after structure building) } @@ -351,15 +367,17 @@ currentElement = subOrBracket; } multipliedElements.add(currentElement); - parentWordOrBracket.insertChild(currentElement, indexOfSubOrBracket); - if (locants !=null){ + if (locants != null){ + parentWordOrBracket.insertChild(currentElement, indexOfSubOrBracket); currentElement.getAttribute(LOCANT_ATR).setValue(locants[i]); performLocantedSubstitutiveOperations(state, currentElement); + currentElement.detach(); } - else{ - performUnLocantedSubstitutiveOperations(state, currentElement); - } - currentElement.detach(); + } + if (locants == null) { + parentWordOrBracket.insertChild(multipliedElements.get(0), indexOfSubOrBracket); + performUnlocantedSubstitutiveOperations(state, multipliedElements); + multipliedElements.get(0).detach(); } for (Element multipliedElement : multipliedElements) {//attach all the multiplied subs/brackets parentWordOrBracket.insertChild(multipliedElement, indexOfSubOrBracket); @@ -369,6 +387,74 @@ } } + private static void performUnlocantedSubstitutiveOperations(BuildState state, List multipliedElements) throws StructureBuildingException { + int numOfSubstituents = multipliedElements.size(); + Element subBracketOrRoot = multipliedElements.get(0); + Element group; + if (subBracketOrRoot.getName().equals(BRACKET_EL)){ + group = findRightMostGroupInBracket(subBracketOrRoot); + } + else{ + group = subBracketOrRoot.getFirstChildElement(GROUP_EL); + } + Fragment frag = group.getFrag(); + if (frag.getOutAtomCount() >= 1){ + if (subBracketOrRoot.getAttribute(LOCANT_ATR) != null){ + throw new RuntimeException("Substituent has an unused outAtom and has a locant but locanted substitution should already been been performed!"); + } + if (PERHALOGENO_SUBTYPE_VAL.equals(group.getAttributeValue(SUBTYPE_ATR))) { + throw new StructureBuildingException(group.getValue() + " cannot be multiplied"); + } + if (frag.getOutAtomCount() > 1){ + checkAndApplySpecialCaseWhereOutAtomsCanBeCombinedOrThrow(frag, group); + } + List atomsToJoinTo = null; + if (PHOSPHO_SUBTYPE_VAL.equals(group.getAttributeValue(SUBTYPE_ATR)) && frag.getOutAtom(0).getValency() == 1){ + List possibleParents = findAlternativeFragments(subBracketOrRoot); + for (Fragment fragment : possibleParents) { + List hydroxyAtoms = FragmentTools.findHydroxyGroups(fragment); + if (hydroxyAtoms.size() >= numOfSubstituents){ + atomsToJoinTo = hydroxyAtoms; + } + break; + } + } + if (atomsToJoinTo == null) { + atomsToJoinTo = findAtomsForSubstitution(subBracketOrRoot, numOfSubstituents, frag.getOutAtom(0).getValency()); + } + if (atomsToJoinTo == null) { + throw new StructureBuildingException("Unlocanted substitution failed: unable to find suitable atom to bond atom with id:" + frag.getOutAtom(0).getAtom().getID() + " to!"); + } + if (AmbiguityChecker.isSubstitutionAmbiguous(atomsToJoinTo, numOfSubstituents)) { + state.addIsAmbiguous("Connection of " + group.getValue() + " to " + atomsToJoinTo.get(0).getFrag().getTokenEl().getValue()); + List atomsPreferredByEnvironment = AmbiguityChecker.useAtomEnvironmentsToGivePlausibleSubstitution(atomsToJoinTo, numOfSubstituents); + if (atomsPreferredByEnvironment != null) { + atomsToJoinTo = atomsPreferredByEnvironment; + } + } + + joinFragmentsSubstitutively(state, frag, atomsToJoinTo.get(0)); + group.addAttribute(new Attribute(RESOLVED_ATR, "yes")); + + for (int i = 1; i < numOfSubstituents; i++) { + subBracketOrRoot = multipliedElements.get(i); + if (subBracketOrRoot.getName().equals(BRACKET_EL)){ + group = findRightMostGroupInBracket(subBracketOrRoot); + } + else{ + group = subBracketOrRoot.getFirstChildElement(GROUP_EL); + } + frag = group.getFrag(); + if (frag.getOutAtomCount() > 1){//TODO do this prior to multiplication? + checkAndApplySpecialCaseWhereOutAtomsCanBeCombinedOrThrow(frag, group); + } + + joinFragmentsSubstitutively(state, frag, atomsToJoinTo.get(i)); + group.addAttribute(new Attribute(RESOLVED_ATR, "yes")); + } + } + } + /** * Adds locanted unsaturators, heteroatoms and hydrogen elements to the group within the sub or root * @param state @@ -376,22 +462,22 @@ * @throws StructureBuildingException */ static void resolveLocantedFeatures(BuildState state, Element subOrRoot) throws StructureBuildingException { - Elements groups = subOrRoot.getChildElements(GROUP_EL); - if (groups.size()!=1){ + List groups = subOrRoot.getChildElements(GROUP_EL); + if (groups.size() != 1){ throw new StructureBuildingException("Each sub or root should only have one group element. This indicates a bug in OPSIN"); } Element group = groups.get(0); - Fragment thisFrag = state.xmlFragmentMap.get(group); + Fragment thisFrag = group.getFrag(); - ArrayList unsaturators = new ArrayList(); - ArrayList heteroatoms = new ArrayList(); - ArrayList hydrogenElements = new ArrayList(); - ArrayList subtractivePrefixElements = new ArrayList(); - - Elements children =subOrRoot.getChildElements(); - for (int i = 0; i < children.size(); i++) { - Element currentEl =children.get(i); - String elName =currentEl.getLocalName(); + List unsaturators = new ArrayList(); + List heteroatoms = new ArrayList(); + List hydrogenElements = new ArrayList(); + List subtractivePrefixElements = new ArrayList(); + List isotopeSpecifications = new ArrayList(); + + List children =subOrRoot.getChildElements(); + for (Element currentEl : children) { + String elName =currentEl.getName(); if (elName.equals(UNSATURATOR_EL)){ unsaturators.add(currentEl); } @@ -410,28 +496,38 @@ else if (elName.equals(ADDEDHYDROGEN_EL)){ hydrogenElements.add(currentEl); } + else if (elName.equals(ISOTOPESPECIFICATION_EL)){ + isotopeSpecifications.add(currentEl); + } } /* * Add locanted functionality */ List atomsToDehydro = new ArrayList(); + //locanted substitution can be assumed to be irrelevant to subtractive operations hence perform all subtractive operations now + Map unlocantedSubtractivePrefixes = new HashMap(); for(int i = subtractivePrefixElements.size() -1; i >= 0; i--) { Element subtractivePrefix = subtractivePrefixElements.get(i); String type = subtractivePrefix.getAttributeValue(TYPE_ATR); if (type.equals(DEOXY_TYPE_VAL)){ String locant = subtractivePrefix.getAttributeValue(LOCANT_ATR); - String element = subtractivePrefix.getAttributeValue(VALUE_ATR); - //locant can be null but locanted substitution can be assumed to be irrelevant to subtractive operations hence perform all subtractive operations now - FragmentTools.removeHydroxyLikeTerminalAtom(state, thisFrag, element, locant); + ChemEl chemEl = ChemEl.valueOf(subtractivePrefix.getAttributeValue(VALUE_ATR)); + if (locant == null) { + Integer count = unlocantedSubtractivePrefixes.get(chemEl); + unlocantedSubtractivePrefixes.put(chemEl, count != null ? count + 1 : 1); + } + else { + applySubtractivePrefix(state, thisFrag, chemEl, locant); + } } else if (type.equals(ANHYDRO_TYPE_VAL)){ applyAnhydroPrefix(state, thisFrag, subtractivePrefix); } else if (type.equals(DEHYDRO_TYPE_VAL)){ String locant = subtractivePrefix.getAttributeValue(LOCANT_ATR); - if(locant!=null) { + if(locant != null) { atomsToDehydro.add(thisFrag.getAtomByLocantOrThrow(locant)); } else{ @@ -443,6 +539,9 @@ } subtractivePrefix.detach(); } + for (Entry entry : unlocantedSubtractivePrefixes.entrySet()) { + applyUnlocantedSubtractivePrefixes(state, thisFrag, entry.getKey(), entry.getValue()); + } if (atomsToDehydro.size() > 0){ boolean isCarbohydrateDehydro = false; @@ -454,7 +553,7 @@ } if (isCarbohydrateDehydro){ for (Atom a : atomsToDehydro) { - List hydroxyAtoms = FragmentTools.findHydroxyLikeTerminalAtoms(a.getAtomNeighbours(), "O"); + List hydroxyAtoms = FragmentTools.findHydroxyLikeTerminalAtoms(a.getAtomNeighbours(), ChemEl.O); if (hydroxyAtoms.size() > 0){ hydroxyAtoms.get(0).getFirstBond().setOrder(2); } @@ -496,13 +595,15 @@ for(int i=hydrogenElements.size() -1;i >= 0;i--) { Element hydrogen = hydrogenElements.get(i); String locant = hydrogen.getAttributeValue(LOCANT_ATR); - if(locant!=null) { + if(locant != null) { Atom a =thisFrag.getAtomByLocantOrThrow(locant); if (a.hasSpareValency()){ a.setSpareValency(false); } else{ - throw new StructureBuildingException("hydrogen addition at locant: " + locant +" was requested, but this atom is not unsaturated"); + if (!acdNameSpiroIndicatedHydrogenBug(group, locant)){ + throw new StructureBuildingException("hydrogen addition at locant: " + locant +" was requested, but this atom is not unsaturated"); + } } hydrogenElements.remove(i); hydrogen.detach(); @@ -517,21 +618,21 @@ unsaturator.detach(); continue; } - if(locant!=null) { + if(locant != null) { unsaturators.remove(unsaturator); /* * Is the locant a compound locant e.g. 1(6) * This would indicate unsaturation between the atoms with locants 1 and 6 */ - Matcher matcher = matchCompoundLocant.matcher(locant); - if (matcher.find()) { - String compoundLocant = matcher.group(1); - locant = matcher.replaceAll(""); - FragmentTools.unsaturate(thisFrag.getAtomByLocantOrThrow(locant), compoundLocant, bondOrder, thisFrag); - } - else{ - FragmentTools.unsaturate(thisFrag.getAtomByLocantOrThrow(locant), bondOrder, thisFrag); - } + Matcher matcher = matchCompoundLocant.matcher(locant); + if (matcher.find()) { + String compoundLocant = matcher.group(1); + locant = matcher.replaceAll(""); + FragmentTools.unsaturate(thisFrag.getAtomByLocantOrThrow(locant), compoundLocant, bondOrder, thisFrag); + } + else { + FragmentTools.unsaturate(thisFrag.getAtomByLocantOrThrow(locant), bondOrder, thisFrag); + } unsaturator.detach(); } } @@ -539,36 +640,127 @@ for(int i=heteroatoms.size() -1;i >= 0;i--) { Element heteroatomEl = heteroatoms.get(i); String locant = heteroatomEl.getAttributeValue(LOCANT_ATR); - if(locant!=null) { + if(locant != null) { Atom heteroatom = state.fragManager.getHeteroatom(heteroatomEl.getAttributeValue(VALUE_ATR)); Atom atomToBeReplaced =thisFrag.getAtomByLocantOrThrow(locant); - if (heteroatom.getElement().equals(atomToBeReplaced.getElement()) && heteroatom.getCharge() == atomToBeReplaced.getCharge()){ + if (heteroatom.getElement() == atomToBeReplaced.getElement() && heteroatom.getCharge() == atomToBeReplaced.getCharge()){ throw new StructureBuildingException("The replacement term " +heteroatomEl.getValue() +" was used on an atom that already is a " + heteroatom.getElement()); } state.fragManager.replaceAtomWithAtom(thisFrag.getAtomByLocantOrThrow(locant), heteroatom, true); - if (heteroatomEl.getAttribute(LAMBDA_ATR)!=null){ + if (heteroatomEl.getAttribute(LAMBDA_ATR) != null){ thisFrag.getAtomByLocantOrThrow(locant).setLambdaConventionValency(Integer.parseInt(heteroatomEl.getAttributeValue(LAMBDA_ATR))); } heteroatoms.remove(heteroatomEl); heteroatomEl.detach(); } } + + if (isotopeSpecifications.size() > 0) { + applyIsotopeSpecifications(state, thisFrag, isotopeSpecifications, true); + } + } + + /** + * ACD/Name has a known bug where it produces names in which a suffixed saturated ring in a polycyclic spiro + * is treated as if it is unsaturated and hence has indicated hydrogens + * e.g. 1',3'-dihydro-2H,5H-spiro[imidazolidine-4,2'-indene]-2,5-dione + * @param group + * @param indicatedHydrogenLocant + * @return + */ + private static boolean acdNameSpiroIndicatedHydrogenBug(Element group, String indicatedHydrogenLocant) { + if (group.getValue().startsWith("spiro")) { + for (Element suffix : group.getParent().getChildElements(SUFFIX_EL)) { + String suffixLocant = suffix.getAttributeValue(LOCANT_ATR); + if (suffixLocant != null && suffixLocant.equals(indicatedHydrogenLocant)) { + LOG.debug("Indicated hydrogen at " + indicatedHydrogenLocant + " ignored. Known bug in generated IUPAC name"); + return true; + } + } + } + return false; + } + + /** + * Removes a terminal atom of a particular element e.g. oxygen + * The locant specifies the atom adjacent to the atom to be removed + * Formally the atom is replaced by hydrogen, hence stereochemistry is intentionally preserved + * @param state + * @param fragment + * @param chemEl + * @param locant A locant or null + * @throws StructureBuildingException + */ + static void applySubtractivePrefix(BuildState state, Fragment fragment, ChemEl chemEl, String locant) throws StructureBuildingException { + Atom adjacentAtom = fragment.getAtomByLocantOrThrow(locant); + List applicableTerminalAtoms = FragmentTools.findHydroxyLikeTerminalAtoms(adjacentAtom.getAtomNeighbours(), chemEl); + if (applicableTerminalAtoms.isEmpty()) { + throw new StructureBuildingException("Unable to find terminal atom of type: " + chemEl + " at locant "+ locant +" for subtractive nomenclature"); + } + Atom atomToRemove = applicableTerminalAtoms.get(0); + if (FragmentTools.isFunctionalAtom(atomToRemove)) {//This can occur with aminoglycosides where the anomeric OH is removed by deoxy + for (int i = 0, len = fragment.getFunctionalAtomCount(); i < len; i++) { + if (atomToRemove.equals(fragment.getFunctionalAtom(i).getAtom())) { + fragment.removeFunctionalAtom(i); + break; + } + } + fragment.addFunctionalAtom(atomToRemove.getFirstBond().getOtherAtom(atomToRemove)); + } + FragmentTools.removeTerminalAtom(state, atomToRemove); + } + + /** + * Removes terminal atoms of a particular element e.g. oxygen + * The number to remove is decided by the count + * Formally the atom is replaced by hydrogen, hence stereochemistry is intentionally preserved + * @param state + * @param fragment + * @param chemEl + * @param count + * @throws StructureBuildingException + */ + static void applyUnlocantedSubtractivePrefixes(BuildState state, Fragment fragment, ChemEl chemEl, int count) throws StructureBuildingException { + List applicableTerminalAtoms = FragmentTools.findHydroxyLikeTerminalAtoms(fragment.getAtomList(), chemEl); + if (applicableTerminalAtoms.isEmpty() || applicableTerminalAtoms.size() < count) { + throw new StructureBuildingException("Unable to find terminal atom of type: " + chemEl + " for subtractive nomenclature"); + } + if (AmbiguityChecker.isSubstitutionAmbiguous(applicableTerminalAtoms, count)) { + state.addIsAmbiguous("Group to remove with subtractive prefix"); + } + for (int i = 0; i < count; i++) { + Atom atomToRemove = applicableTerminalAtoms.get(i); + if (FragmentTools.isFunctionalAtom(atomToRemove)) {//This can occur with aminoglycosides where the anomeric OH is removed by deoxy + for (int j = 0, len = fragment.getFunctionalAtomCount(); j < len; j++) { + if (atomToRemove.equals(fragment.getFunctionalAtom(j).getAtom())) { + fragment.removeFunctionalAtom(j); + break; + } + } + fragment.addFunctionalAtom(atomToRemove.getFirstBond().getOtherAtom(atomToRemove)); + } + FragmentTools.removeTerminalAtom(state, atomToRemove); + } } private static void applyAnhydroPrefix(BuildState state, Fragment frag, Element subtractivePrefix) throws StructureBuildingException { - String element = subtractivePrefix.getAttributeValue(VALUE_ATR); - String[] locants = MATCH_COMMA.split(subtractivePrefix.getAttributeValue(LOCANT_ATR)); + ChemEl chemEl = ChemEl.valueOf(subtractivePrefix.getAttributeValue(VALUE_ATR)); + String locantStr = subtractivePrefix.getAttributeValue(LOCANT_ATR); + if (locantStr == null) { + throw new StructureBuildingException("Two locants are required before an anhydro prefix"); + } + String[] locants = locantStr.split(","); Atom backBoneAtom1 = frag.getAtomByLocantOrThrow(locants[0]); Atom backBoneAtom2 = frag.getAtomByLocantOrThrow(locants[1]); - List applicableTerminalAtoms = FragmentTools.findHydroxyLikeTerminalAtoms(backBoneAtom1.getAtomNeighbours(), element); + List applicableTerminalAtoms = FragmentTools.findHydroxyLikeTerminalAtoms(backBoneAtom1.getAtomNeighbours(), chemEl); if (applicableTerminalAtoms.isEmpty()){ - throw new StructureBuildingException("Unable to find terminal atom of type: " + element + " for subtractive nomenclature"); + throw new StructureBuildingException("Unable to find terminal atom of type: " + chemEl + " for subtractive nomenclature"); } FragmentTools.removeTerminalAtom(state, applicableTerminalAtoms.get(0)); - applicableTerminalAtoms = FragmentTools.findHydroxyLikeTerminalAtoms(backBoneAtom2.getAtomNeighbours(), element); + applicableTerminalAtoms = FragmentTools.findHydroxyLikeTerminalAtoms(backBoneAtom2.getAtomNeighbours(), chemEl); if (applicableTerminalAtoms.isEmpty()){ - throw new StructureBuildingException("Unable to find terminal atom of type: " + element + " for subtractive nomenclature"); + throw new StructureBuildingException("Unable to find terminal atom of type: " + chemEl + " for subtractive nomenclature"); } state.fragManager.createBond(backBoneAtom1, applicableTerminalAtoms.get(0), 1); } @@ -586,7 +778,7 @@ throw new StructureBuildingException("locants specified for dehydro specify the same atom too many times"); } atomLoop: for (int i = atomsToFormTripleBondsBetween.size()-1; i >=0; i = i-2) {//two atoms will have a triple bond formed betwen them - Atom a = atomsToFormTripleBondsBetween.get(i); + Atom a = atomsToFormTripleBondsBetween.get(i); List neighbours = a.getAtomNeighbours(); for (Atom neighbour : neighbours) { if (atomsToFormTripleBondsBetween.contains(neighbour)){ @@ -611,249 +803,597 @@ * @throws StructureBuildingException */ static void resolveUnLocantedFeatures(BuildState state, Element subOrRoot) throws StructureBuildingException { - Elements groups = subOrRoot.getChildElements(GROUP_EL); - if (groups.size()!=1){ + List groups = subOrRoot.getChildElements(GROUP_EL); + if (groups.size() != 1){ throw new StructureBuildingException("Each sub or root should only have one group element. This indicates a bug in OPSIN"); } - Element group = groups.get(0); - Fragment thisFrag = state.xmlFragmentMap.get(group); - List atomList =thisFrag.getAtomList(); + Fragment frag = groups.get(0).getFrag(); - ArrayList unsaturators = new ArrayList(); - ArrayList heteroatoms = new ArrayList(); - ArrayList hydrogenElements = new ArrayList(); - - Elements children =subOrRoot.getChildElements(); - for (int i = 0; i < children.size(); i++) { - Element currentEl =children.get(i); - String elName =currentEl.getLocalName(); - if (elName.equals(UNSATURATOR_EL)){ - unsaturators.add(currentEl); + List unsaturationBondOrders = new ArrayList(); + List heteroatoms = new ArrayList(); + List hydrogenElements = new ArrayList(); + List isotopeSpecifications = new ArrayList(); + + List children = subOrRoot.getChildElements(); + for (Element currentEl : children) { + String elName = currentEl.getName(); + if (elName.equals(UNSATURATOR_EL)) { + int bondOrder = Integer.parseInt(currentEl.getAttributeValue(VALUE_ATR)); + if (bondOrder > 1) { + unsaturationBondOrders.add(bondOrder); + } + currentEl.detach(); } else if (elName.equals(HETEROATOM_EL)){ heteroatoms.add(currentEl); + currentEl.detach(); } - else if (elName.equals(HYDRO_EL)){ + else if (elName.equals(HYDRO_EL) || + elName.equals(INDICATEDHYDROGEN_EL) || + elName.equals(ADDEDHYDROGEN_EL)){ hydrogenElements.add(currentEl); + currentEl.detach(); } - else if (elName.equals(INDICATEDHYDROGEN_EL)){ - hydrogenElements.add(currentEl); + else if (elName.equals(ISOTOPESPECIFICATION_EL)){ + isotopeSpecifications.add(currentEl); } - else if (elName.equals(ADDEDHYDROGEN_EL)){ - hydrogenElements.add(currentEl); + } + + if (hydrogenElements.size() > 0) { + applyUnlocantedHydro(state, frag, hydrogenElements); + } + + if (unsaturationBondOrders.size() > 0){ + unsaturateBonds(state, frag, unsaturationBondOrders); + } + + if (heteroatoms.size() > 0) { + applyUnlocantedHeteroatoms(state, frag, heteroatoms); + } + + if (isotopeSpecifications.size() > 0) { + applyIsotopeSpecifications(state, frag, isotopeSpecifications, false); + } + + if (frag.getOutAtomCount() > 0){//assign any outAtoms that have not been set to a specific atom to a specific atom + for (int i = 0, l = frag.getOutAtomCount(); i < l; i++) { + OutAtom outAtom = frag.getOutAtom(i); + if (!outAtom.isSetExplicitly()){ + outAtom.setAtom(findAtomForUnlocantedRadical(state, frag, outAtom)); + outAtom.setSetExplicitly(true); + } } } + } - if (hydrogenElements.size()>0){ - /* - * This function is not entirely straightforward as certain atoms definitely should have their spare valency reduced - * However names are not consistent as to whether they bother having the hydro tags do this! - * The atoms in atomsWithSV are in atom order those that can take a hydro element and then those that shouldn't really take a hydro element as its absence is unambiguous - */ - LinkedList atomsWithSV = new LinkedList(); - LinkedList atomsWhichImplicitlyWillHaveTheirSVRemoved = new LinkedList(); - for (Atom atom : atomList) { - if (atom.getType().equals(SUFFIX_TYPE_VAL)){ - break; + private static void applyUnlocantedHydro(BuildState state, Fragment frag, List hydrogenElements) throws StructureBuildingException { + /* + * This function is not entirely straightforward as certain atoms definitely should have their spare valency reduced + * However names are not consistent as to whether they bother having the hydro tags do this! + * The atoms in atomsWithSV are in atom order those that can take a hydro element and then those that shouldn't really take a hydro element as its absence is unambiguous + */ + List atomsAcceptingHydroPrefix = new ArrayList(); + Set atomsWhichImplicitlyHadTheirSVRemoved = new HashSet(); + List atomList = frag.getAtomList(); + for (Atom atom : atomList) { + if (atom.getType().equals(SUFFIX_TYPE_VAL)){ + continue; + } + atom.ensureSVIsConsistantWithValency(false);//doesn't take into account suffixes + if (atom.hasSpareValency()) { + atomsAcceptingHydroPrefix.add(atom); + //if we take into account suffixes is the SV removed + atom.ensureSVIsConsistantWithValency(true); + if (!atom.hasSpareValency()) { + atomsWhichImplicitlyHadTheirSVRemoved.add(atom); + } + } + } + + int hydrogenElsCount = hydrogenElements.size(); + for (Element hydrogenElement : hydrogenElements) { + if (hydrogenElement.getValue().equals("perhydro")) { + if (hydrogenElsCount != 1){ + throw new StructureBuildingException("Unexpected indication of hydrogen when perhydro makes such indication redundnant"); } - atom.ensureSVIsConsistantWithValency(false);//doesn't take into account suffixes - if (atom.hasSpareValency()){ - if (atomWillHaveSVImplicitlyRemoved(atom)){ - atomsWhichImplicitlyWillHaveTheirSVRemoved.add(atom); + for (Atom atom : atomsAcceptingHydroPrefix) { + atom.setSpareValency(false); + } + return; + } + } + + List atomsWithDefiniteSV = new ArrayList(); + List otherAtomsThatCanHaveHydro = new ArrayList(); + for(Atom a : atomsAcceptingHydroPrefix) { + if (atomsWhichImplicitlyHadTheirSVRemoved.contains(a)) { + otherAtomsThatCanHaveHydro.add(a); + } + else { + boolean canFormDoubleBond = false; + for(Atom aa : frag.getIntraFragmentAtomNeighbours(a)) { + if(aa.hasSpareValency()) { + canFormDoubleBond = true; + break; + } + } + if (canFormDoubleBond) { + atomsWithDefiniteSV.add(a); + } + else { + otherAtomsThatCanHaveHydro.add(a); + } + } + } + List prioritisedAtomsAcceptingHydro = new ArrayList(atomsWithDefiniteSV); + prioritisedAtomsAcceptingHydro.addAll(otherAtomsThatCanHaveHydro);//these end up at the end of the list + + if (hydrogenElsCount > prioritisedAtomsAcceptingHydro.size()) { + throw new StructureBuildingException("Cannot find atom to add hydrogen to (" + + hydrogenElsCount + " hydrogens requested but only " + prioritisedAtomsAcceptingHydro.size() +" positions that can be hydrogenated)" ); + } + + int svCountAfterRemoval = atomsWithDefiniteSV.size() - hydrogenElsCount; + if (svCountAfterRemoval > 1) { //ambiguity likely. If it's 1 then an atom will be implicitly hydrogenated + //NOTE: as hydrogens as added in pairs the unambiguous if one hydrogen is added and allow atoms are identical condition is unlikely to be ever satisfied + if (!(AmbiguityChecker.allAtomsEquivalent(atomsWithDefiniteSV) && + (hydrogenElsCount == 1 || hydrogenElsCount == atomsWithDefiniteSV.size() - 1))) { + state.addIsAmbiguous("Ambiguous choice of positions to add hydrogen to on " + frag.getTokenEl().getValue()); + } + } + + for (int i = 0; i < hydrogenElsCount; i++) { + prioritisedAtomsAcceptingHydro.get(i).setSpareValency(false); + } + } + + private static void unsaturateBonds(BuildState state, Fragment frag, List unsaturationBondOrders) throws StructureBuildingException { + int tripleBonds = 0; + int doublebonds = 0; + for (Integer bondOrder : unsaturationBondOrders) { + if (bondOrder == 3) { + tripleBonds++; + } + else if (bondOrder == 2) { + doublebonds++; + } + else { + throw new RuntimeException("Unexpected unsaturation bon order: " + bondOrder); + } + } + + if (tripleBonds > 0) { + unsaturateBonds(state, frag, 3, tripleBonds); + } + if (doublebonds > 0) { + unsaturateBonds(state, frag, 2, doublebonds); + } + } + + private static void unsaturateBonds(BuildState state, Fragment frag, int bondOrder, int numToUnsaturate) throws StructureBuildingException { + List bondsThatCouldBeUnsaturated = findBondsToUnSaturate(frag, bondOrder, false); + List alternativeBondsThatCouldBeUnsaturated = Collections.emptyList(); + if (bondsThatCouldBeUnsaturated.size() < numToUnsaturate){ + bondsThatCouldBeUnsaturated = findBondsToUnSaturate(frag, bondOrder, true); + } + else { + alternativeBondsThatCouldBeUnsaturated = findAlternativeBondsToUnSaturate(frag, bondOrder, bondsThatCouldBeUnsaturated); + } + if (bondsThatCouldBeUnsaturated.size() < numToUnsaturate){ + throw new StructureBuildingException("Failed to find bond to change to a bond of order: " + bondOrder); + } + if (bondsThatCouldBeUnsaturated.size() > numToUnsaturate) { + //by convention cycloalkanes can have one unsaturation implicitly at the 1 locant + //terms like oxazoline are formally ambiguous but in practice the lowest locant is the one that will be intended (in this case 2-oxazoline) + if (!isCycloAlkaneSpecialCase(frag, numToUnsaturate, bondsThatCouldBeUnsaturated) && + !HANTZSCHWIDMAN_SUBTYPE_VAL.equals(frag.getSubType())) { + if (alternativeBondsThatCouldBeUnsaturated.size() >= numToUnsaturate) { + List allBonds = new ArrayList(bondsThatCouldBeUnsaturated); + allBonds.addAll(alternativeBondsThatCouldBeUnsaturated); + if (!(AmbiguityChecker.allBondsEquivalent(allBonds) && + numToUnsaturate == 1 )) { + state.addIsAmbiguous("Unsaturation of bonds of " + frag.getTokenEl().getValue()); + } + } + else { + if (!(AmbiguityChecker.allBondsEquivalent(bondsThatCouldBeUnsaturated) && + (numToUnsaturate == 1 || numToUnsaturate == bondsThatCouldBeUnsaturated.size() - 1))){ + state.addIsAmbiguous("Unsaturation of bonds of " + frag.getTokenEl().getValue()); + } + } + } + } + for (int i = 0; i < numToUnsaturate; i++) { + bondsThatCouldBeUnsaturated.get(i).setOrder(bondOrder); + } + } + + private static boolean isCycloAlkaneSpecialCase(Fragment frag, int numToUnsaturate, List bondsThatCouldBeUnsaturated) { + if (numToUnsaturate == 1) { + Bond b = bondsThatCouldBeUnsaturated.get(0); + Atom a1 = b.getFromAtom(); + Atom a2 = b.getToAtom(); + if ((ALKANESTEM_SUBTYPE_VAL.equals(frag.getSubType()) || HETEROSTEM_SUBTYPE_VAL.equals(frag.getSubType())) && + a1.getAtomIsInACycle() && a2.getAtomIsInACycle() && + (a1.equals(frag.getFirstAtom()) || a2.equals(frag.getFirstAtom()))) { + //mono unsaturated cyclo alkanes are unambiguous e.g. cyclohexene + return true; + } + } + return false; + } + + private static boolean isCycloAlkaneHeteroatomSpecialCase(Fragment frag, int numHeteroatoms, List atomsThatCouldBeReplaced) { + if (numHeteroatoms == 1) { + if ((ALKANESTEM_SUBTYPE_VAL.equals(frag.getSubType()) || HETEROSTEM_SUBTYPE_VAL.equals(frag.getSubType())) && + frag.getFirstAtom().getAtomIsInACycle() && atomsThatCouldBeReplaced.get(0).equals(frag.getFirstAtom())) { + //single heteroatom implicitly goes to 1 position + return true; + } + } + return false; + } + + private static class HeteroAtomSmilesAndLambda { + private final String smiles; + private final String lambdaConvention; + + public HeteroAtomSmilesAndLambda(String smiles, String lambdaConvention) { + this.smiles = smiles; + this.lambdaConvention = lambdaConvention; + } + + @Override + public int hashCode() { + final int prime = 31; + int result = 1; + result = prime + * result + + ((lambdaConvention == null) ? 0 : lambdaConvention + .hashCode()); + result = prime * result + + ((smiles == null) ? 0 : smiles.hashCode()); + return result; + } + + @Override + public boolean equals(Object obj) { + if (this == obj) + return true; + if (obj == null) + return false; + if (getClass() != obj.getClass()) + return false; + HeteroAtomSmilesAndLambda other = (HeteroAtomSmilesAndLambda) obj; + if (lambdaConvention == null) { + if (other.lambdaConvention != null) + return false; + } else if (!lambdaConvention.equals(other.lambdaConvention)) + return false; + if (smiles == null) { + if (other.smiles != null) + return false; + } else if (!smiles.equals(other.smiles)) + return false; + return true; + } + + + } + + private static void applyUnlocantedHeteroatoms(BuildState state, Fragment frag, List heteroatoms) throws StructureBuildingException { + Map heteroatomDescriptionToCount = new HashMap(); + for (Element heteroatomEl : heteroatoms) { + String smiles = heteroatomEl.getAttributeValue(VALUE_ATR); + String lambdaConvention = heteroatomEl.getAttributeValue(LAMBDA_ATR); + HeteroAtomSmilesAndLambda desc = new HeteroAtomSmilesAndLambda(smiles, lambdaConvention); + Integer count = heteroatomDescriptionToCount.get(desc); + heteroatomDescriptionToCount.put(desc, count != null ? count + 1 : 1); + } + List atomlist = frag.getAtomList(); + for (Entry entry : heteroatomDescriptionToCount.entrySet()) { + HeteroAtomSmilesAndLambda desc = entry.getKey(); + int replacementsRequired = entry.getValue(); + Atom heteroatom = state.fragManager.getHeteroatom(desc.smiles); + ChemEl heteroatomChemEl = heteroatom.getElement(); + //finds an atom for which changing it to the specified heteroatom will not cause valency to be violated + List atomsThatCouldBeReplaced = new ArrayList(); + for (Atom atom : atomlist) { + if (atom.getType().equals(SUFFIX_TYPE_VAL)) { + continue; + } + if ((heteroatomChemEl.equals(atom.getElement()) && heteroatom.getCharge() == atom.getCharge())){ + continue;//replacement would do nothing + } + if(atom.getElement() != ChemEl.C && heteroatomChemEl != ChemEl.C){ + if (atom.getElement() == ChemEl.O && (heteroatomChemEl == ChemEl.S || heteroatomChemEl == ChemEl.Se || heteroatomChemEl == ChemEl.Te)) { + //by special case allow replacement of oxygen by chalcogen } else{ - atomsWithSV.add(atom); + //replacement of heteroatom by another heteroatom + continue; } } + if (ValencyChecker.checkValencyAvailableForReplacementByHeteroatom(atom, heteroatom)) { + atomsThatCouldBeReplaced.add(atom); + } + } + if (atomsThatCouldBeReplaced.size() < replacementsRequired){ + throw new StructureBuildingException("Cannot find suitable atom for heteroatom replacement"); } - atomsWithSV.addAll(atomsWhichImplicitlyWillHaveTheirSVRemoved);//these end up at the end of the list - boolean saturateAllAtoms =false; - for (Element hydrogenElement : hydrogenElements) { - if (hydrogenElement.getValue().equals("perhydro")){ - saturateAllAtoms =true; - hydrogenElement.detach(); + + if (atomsThatCouldBeReplaced.size() > replacementsRequired && !isCycloAlkaneHeteroatomSpecialCase(frag, replacementsRequired, atomsThatCouldBeReplaced)) { + if (!(AmbiguityChecker.allAtomsEquivalent(atomsThatCouldBeReplaced) && + (replacementsRequired == 1 || replacementsRequired == atomsThatCouldBeReplaced.size() - 1))) { + //by convention cycloalkanes can have one unsaturation implicitly at the 1 locant + state.addIsAmbiguous("Heteroatom replacement on " + frag.getTokenEl().getValue()); } } - if (saturateAllAtoms){ - if (hydrogenElements.size() != 1){ - throw new StructureBuildingException("Unexpected indication of hydrogen when perhydro makes such indication redundnant"); + + for (int i = 0; i < replacementsRequired; i++) { + Atom atomToReplaceWithHeteroAtom = atomsThatCouldBeReplaced.get(i); + state.fragManager.replaceAtomWithAtom(atomToReplaceWithHeteroAtom, heteroatom, true); + if (desc.lambdaConvention != null) { + atomToReplaceWithHeteroAtom.setLambdaConventionValency(Integer.parseInt(desc.lambdaConvention)); } - for (Atom atomToReduceSpareValencyOn : atomsWithSV) { - atomToReduceSpareValencyOn.setSpareValency(false); - } } - else{ - if (hydrogenElements.size()> atomsWithSV.size()){ - throw new StructureBuildingException("Cannot find atom to add hydrogen to (" + - hydrogenElements.size() + " hydrogen adding tags but only " + atomsWithSV.size() +" positions that can be hydrogenated)" ); - } - for (Element hydrogenElement : hydrogenElements) { - Atom atomToReduceSpareValencyOn = atomsWithSV.removeFirst(); - atomToReduceSpareValencyOn.setSpareValency(false); - hydrogenElement.detach(); - } - } - } - - for (Element unsaturator : unsaturators) { - int bondOrder = Integer.parseInt(unsaturator.getAttributeValue(VALUE_ATR)); - if (bondOrder <= 1) { - unsaturator.detach(); - continue; - } - - //checks if both atoms can accept an extra bond (if double bond) or two extra bonds (if triple bond) - Bond bondToUnsaturate = findBondToUnSaturate(atomList, bondOrder, false); - if (bondToUnsaturate ==null){ - bondToUnsaturate = findBondToUnSaturate(atomList, bondOrder, true); - } - if (bondToUnsaturate ==null){ - throw new StructureBuildingException("Cannot find bond to unsaturate using unsaturator: " +unsaturator.getValue()); - } - bondToUnsaturate.setOrder(bondOrder); - unsaturator.detach(); - } - int atomIndice =0; - - for (Element heteroatomEl : heteroatoms) { - Atom heteroatom = state.fragManager.getHeteroatom(heteroatomEl.getAttributeValue(VALUE_ATR)); - String heteroatomSymbol = heteroatom.getElement(); - //finds an atom for which changing it to the specified heteroatom will not cause valency to be violated - Atom atomToReplaceWithHeteroAtom =null; - for (; atomIndice < atomList.size(); atomIndice++) { - Atom possibleAtom = atomList.get(atomIndice); - if (possibleAtom.getType().equals(SUFFIX_TYPE_VAL)) { - continue; - } - if ((heteroatomSymbol.equals(possibleAtom.getElement()) && heteroatom.getCharge() == possibleAtom.getCharge())){ - continue;//replacement would do nothing - } - if(!possibleAtom.getElement().equals("C") && !heteroatomSymbol.equals("C")){ - if (possibleAtom.getElement().equals("O") && (heteroatomSymbol.equals("S") || heteroatomSymbol.equals("Se") || heteroatomSymbol.equals("Te"))){ - //special case for replacement of oxygen by chalcogen - } - else{ - //replacement of heteroatom by another heteroatom - continue; - } - } - if (ValencyChecker.checkValencyAvailableForReplacementByHeteroatom(possibleAtom, heteroatom)) { - atomToReplaceWithHeteroAtom = possibleAtom; - break; - } - } - if (atomToReplaceWithHeteroAtom == null){ - throw new StructureBuildingException("Cannot find suitable atom for heteroatom replacement"); - } - - state.fragManager.replaceAtomWithAtom(atomToReplaceWithHeteroAtom, heteroatom, true); - if (heteroatomEl.getAttribute(LAMBDA_ATR) != null) { - atomToReplaceWithHeteroAtom.setLambdaConventionValency(Integer.parseInt(heteroatomEl.getAttributeValue(LAMBDA_ATR))); - } - atomIndice++; - heteroatomEl.detach(); - } - if (thisFrag.getOutAtomCount() > 0){//assign any outAtoms that have not been set to a specific atom to a specific atom - for (int i = 0, l = thisFrag.getOutAtomCount(); i < l; i++) { - OutAtom outAtom = thisFrag.getOutAtom(i); - if (!outAtom.isSetExplicitly()){ - Atom atomToAssociateOutAtomWith = thisFrag.getAtomOrNextSuitableAtom(outAtom.getAtom(), outAtom.getValency(), true); - if (atomToAssociateOutAtomWith==null){ - throw new StructureBuildingException("Failed to assign all unlocanted radicals to actual atoms without violating valency"); - } - outAtom.setAtom(atomToAssociateOutAtomWith); - outAtom.setSetExplicitly(true); + } + } + + private static void applyIsotopeSpecifications(BuildState state, Fragment frag, List isotopeSpecifications, boolean applyLocanted) throws StructureBuildingException { + for(int i = isotopeSpecifications.size() - 1; i >= 0; i--) { + Element isotopeSpecification = isotopeSpecifications.get(i); + IsotopeSpecification isotopeSpec = IsotopeSpecificationParser.parseIsotopeSpecification(isotopeSpecification); + String[] locants = isotopeSpec.getLocants(); + if(locants != null) { + if (!applyLocanted) { + continue; + } + } + else if (applyLocanted) { + continue; + } + + ChemEl chemEl = isotopeSpec.getChemEl(); + int isotope = isotopeSpec.getIsotope(); + if(locants != null) { + if (chemEl == ChemEl.H) { + for (int j = 0; j < locants.length; j++) { + Atom atomWithHydrogenIsotope = frag.getAtomByLocantOrThrow(locants[j]); + Atom hydrogen = state.fragManager.createAtom(isotopeSpec.getChemEl(), frag); + hydrogen.setIsotope(isotope); + state.fragManager.createBond(atomWithHydrogenIsotope, hydrogen, 1); + } + } + else { + for (int j = 0; j < locants.length; j++) { + Atom atom = frag.getAtomByLocantOrThrow(locants[j]); + if (chemEl != atom.getElement()) { + throw new StructureBuildingException("The atom at locant: " + locants[j] + " was not a " + chemEl.toString() ); + } + atom.setIsotope(isotope); + } } } + else { + int multiplier = isotopeSpec.getMultiplier(); + if (chemEl == ChemEl.H) { + List parentAtomsToApplyTo = FragmentTools.findnAtomsForSubstitution(frag, multiplier, 1); + if (parentAtomsToApplyTo == null){ + throw new StructureBuildingException("Failed to find sufficient hydrogen atoms for unlocanted hydrogen isotope replacement"); + } + if (AmbiguityChecker.isSubstitutionAmbiguous(parentAtomsToApplyTo, multiplier)) { + if (!casIsotopeAmbiguitySpecialCase(frag, parentAtomsToApplyTo, multiplier)) { + state.addIsAmbiguous("Position of hydrogen isotope on " + frag.getTokenEl().getValue()); + } + } + for (int j = 0; j < multiplier; j++) { + Atom atomWithHydrogenIsotope = parentAtomsToApplyTo.get(j); + Atom hydrogen = state.fragManager.createAtom(isotopeSpec.getChemEl(), frag); + hydrogen.setIsotope(isotope); + state.fragManager.createBond(atomWithHydrogenIsotope, hydrogen, 1); + } + } + else { + List parentAtomsToApplyTo = new ArrayList(); + for (Atom atom : frag.getAtomList()) { + if (atom.getElement() == chemEl) { + parentAtomsToApplyTo.add(atom); + } + } + if (parentAtomsToApplyTo.size() < multiplier) { + throw new StructureBuildingException("Failed to find sufficient atoms for " + chemEl.toString() + " isotope replacement"); + } + if (AmbiguityChecker.isSubstitutionAmbiguous(parentAtomsToApplyTo, multiplier)) { + state.addIsAmbiguous("Position of isotope on " + frag.getTokenEl().getValue()); + } + for (int j = 0; j < multiplier; j++) { + parentAtomsToApplyTo.get(j).setIsotope(isotope); + } + } + } + isotopeSpecification.detach(); + } + } + + private static boolean casIsotopeAmbiguitySpecialCase(Fragment frag, List parentAtomsToApplyTo, int multiplier) throws StructureBuildingException { + if (multiplier !=1) { + return false; + } + List atoms = frag.getAtomList(); + Atom firstAtom = atoms.get(0); + if (!parentAtomsToApplyTo.get(0).equals(firstAtom)) { + return false; + } + ChemEl firstAtomEl = firstAtom.getElement(); + if (atoms.size() ==2) { + if (firstAtomEl == atoms.get(1).getElement()) { + //e.g. ethane + return true; + } + } + else { + int intraFragValency = frag.getIntraFragmentIncomingValency(firstAtom); + boolean spareValency = firstAtom.hasSpareValency(); + if (firstAtom.getAtomIsInACycle()) { + for (int i = 1; i < atoms.size(); i++) { + Atom atom = atoms.get(i); + if (atom.getElement() != firstAtomEl){ + return false; + } + if (frag.getIntraFragmentIncomingValency(atom) != intraFragValency){ + return false; + } + if (atom.hasSpareValency() != spareValency){ + return false; + } + } + //e.g. benzene + return true; + } } + return false; } + static Atom findAtomForUnlocantedRadical(BuildState state, Fragment frag, OutAtom outAtom) throws StructureBuildingException { + List possibleAtoms = FragmentTools.findnAtomsForSubstitution(frag, outAtom.getAtom(), 1, outAtom.getValency(), true); + if (possibleAtoms == null){ + throw new StructureBuildingException("Failed to assign all unlocanted radicals to actual atoms without violating valency"); + } + if (!((ALKANESTEM_SUBTYPE_VAL.equals(frag.getSubType()) || HETEROSTEM_SUBTYPE_VAL.equals(frag.getSubType())) && possibleAtoms.get(0).equals(frag.getFirstAtom()))) { + if (AmbiguityChecker.isSubstitutionAmbiguous(possibleAtoms, 1)) { + state.addIsAmbiguous("Positioning of radical on: " + frag.getTokenEl().getValue()); + } + } + return possibleAtoms.get(0); + } + + + private static List findAlternativeBondsToUnSaturate(Fragment frag, int bondOrder, Collection bondsToIgnore) { + return findBondsToUnSaturate(frag, bondOrder, false, new HashSet(bondsToIgnore)); + } /** - * Attempts to find a bond that can have its bondOrder increased to the specified bond order + * Finds bond within the fragment that can have their bondOrder increased to the specified bond order * Depending on the value of allowAdjacentUnsaturatedBonds adjacent higher bonds are prevented - * @param atomList + * @param frag * @param bondOrder * @param allowAdjacentUnsaturatedBonds * @return */ - static Bond findBondToUnSaturate(List atomList, int bondOrder, boolean allowAdjacentUnsaturatedBonds) { - Bond bondToUnsaturate =null; - mainLoop: for (Atom atom1 : atomList) { + static List findBondsToUnSaturate(Fragment frag, int bondOrder, boolean allowAdjacentUnsaturatedBonds) { + return findBondsToUnSaturate(frag, bondOrder, allowAdjacentUnsaturatedBonds, Collections.emptySet()); + } + + private static List findBondsToUnSaturate(Fragment frag, int bondOrder, boolean allowAdjacentUnsaturatedBonds, Set bondsToIgnore) { + List bondsToUnsaturate = new ArrayList(); + mainLoop: for (Atom atom1 : frag.getAtomList()) { + if (atom1.hasSpareValency() || SUFFIX_TYPE_VAL.equals(atom1.getType()) || atom1.getProperty(Atom.ISALDEHYDE) !=null) { + continue; + } List bonds = atom1.getBonds(); - if (!allowAdjacentUnsaturatedBonds){ - for (Bond bond : bonds) { - if (bond.getOrder()!=1){//don't place implicitly unsaturated bonds next to each other + int incomingValency = 0; + for (Bond bond : bonds) { + //don't place implicitly unsaturated bonds next to each other + if (bond.getOrder() != 1 && !allowAdjacentUnsaturatedBonds) { + continue mainLoop; + } + if (bondsToUnsaturate.contains(bond)) { + if (!allowAdjacentUnsaturatedBonds) { continue mainLoop; } + incomingValency += bondOrder; + } + else { + incomingValency += bond.getOrder(); } } + + Integer maxVal = getLambdaValencyOrHwValencyOrMaxValIfCharged(atom1); + if(maxVal != null && (incomingValency + (bondOrder - 1) + atom1.getOutValency()) > maxVal) { + continue; + } bondLoop: for (Bond bond : bonds) { - if (bond.getOrder()==1 && !atom1.hasSpareValency() && !SUFFIX_TYPE_VAL.equals(atom1.getType()) && atom1.getProperty(Atom.ISALDEHYDE) ==null - && ValencyChecker.checkValencyAvailableForBond(atom1, bondOrder - 1 + atom1.getOutValency())){ + if (bond.getOrder() == 1 && !bondsToUnsaturate.contains(bond) && !bondsToIgnore.contains(bond)) { Atom atom2 = bond.getOtherAtom(atom1); - if (!allowAdjacentUnsaturatedBonds){ - for (Bond bond2 : atom2.getBonds()) { - if (bond2.getOrder()!=1){//don't place implicitly unsaturated bonds next to each other - continue bondLoop; - } - } - } - if (!atom2.hasSpareValency() && !SUFFIX_TYPE_VAL.equals(atom2.getType()) && atom2.getProperty(Atom.ISALDEHYDE) ==null - && ValencyChecker.checkValencyAvailableForBond(atom2, bondOrder - 1 + atom2.getOutValency())){ - bondToUnsaturate = bond; - break mainLoop; + if (frag.getAtomByID(atom2.getID()) != null) {//check other atom is actually in the fragment! + if (atom2.hasSpareValency() || SUFFIX_TYPE_VAL.equals(atom2.getType()) || atom2.getProperty(Atom.ISALDEHYDE) !=null) { + continue; + } + int incomingValency2 = 0; + for (Bond bond2 : atom2.getBonds()) { + //don't place implicitly unsaturated bonds next to each other + if (bond2.getOrder() != 1 && !allowAdjacentUnsaturatedBonds) { + continue bondLoop; + } + if (bondsToUnsaturate.contains(bond2)) { + if (!allowAdjacentUnsaturatedBonds) { + continue bondLoop; + } + incomingValency2 += bondOrder; + } + else { + incomingValency2 += bond2.getOrder(); + } + } + + Integer maxVal2 = getLambdaValencyOrHwValencyOrMaxValIfCharged(atom2); + if(maxVal2 != null && (incomingValency2 + (bondOrder - 1) + atom2.getOutValency()) > maxVal2) { + continue; + } + bondsToUnsaturate.add(bond); + break bondLoop; } } } } - return bondToUnsaturate; + return bondsToUnsaturate; } - - private static boolean atomWillHaveSVImplicitlyRemoved(Atom atom) throws StructureBuildingException { - boolean canFormDoubleBond =false; - for(Atom aa : atom.getFrag().getIntraFragmentAtomNeighbours(atom)) { - if(aa.hasSpareValency()){ - canFormDoubleBond=true; - } + + + /** + * Return the lambda convention derived valency + protons if set + * Otherwise if charge is 0 returns {@link ValencyChecker#getHWValency(ChemEl)} + * Otherwise return {@link ValencyChecker#getMaximumValency(ChemEl, int)} + * Returns null if the maximum valency is not known + * @param a + * @return + */ + static Integer getLambdaValencyOrHwValencyOrMaxValIfCharged(Atom a) { + if (a.getLambdaConventionValency() != null) { + return a.getLambdaConventionValency() + a.getProtonsExplicitlyAddedOrRemoved(); } - if (!canFormDoubleBond){ - return true; + else if (a.getCharge() == 0){ + return ValencyChecker.getHWValency(a.getElement()); } - - if (atom.hasSpareValency()){ - atom.ensureSVIsConsistantWithValency(true); - if (!atom.hasSpareValency()){ - atom.setSpareValency(true); - return true; - } + else { + return ValencyChecker.getMaximumValency(a.getElement(), a.getCharge()); } - return false; } private static void performAdditiveOperations(BuildState state, Element subBracketOrRoot) throws StructureBuildingException { - if (subBracketOrRoot.getAttribute(LOCANT_ATR)!=null){//additive nomenclature does not employ locants + if (subBracketOrRoot.getAttribute(LOCANT_ATR) != null){//additive nomenclature does not employ locants return; } Element group; - if (subBracketOrRoot.getLocalName().equals(BRACKET_EL)){ + if (subBracketOrRoot.getName().equals(BRACKET_EL)){ group =findRightMostGroupInBracket(subBracketOrRoot); } else{ group =subBracketOrRoot.getFirstChildElement(GROUP_EL); } - if (group.getAttribute(RESOLVED_ATR)!=null){ + if (group.getAttribute(RESOLVED_ATR) != null){ return; } - Fragment frag = state.xmlFragmentMap.get(group); + Fragment frag = group.getFrag(); int outAtomCount = frag.getOutAtomCount(); if (outAtomCount >=1){ if (subBracketOrRoot.getAttribute(MULTIPLIER_ATR) ==null){ - Element nextSiblingEl = (Element) XOMTools.getNextSibling(subBracketOrRoot); - if (nextSiblingEl.getAttribute(MULTIPLIER_ATR)!=null && + Element nextSiblingEl = OpsinTools.getNextSibling(subBracketOrRoot); + if (nextSiblingEl.getAttribute(MULTIPLIER_ATR) != null && (outAtomCount >= Integer.parseInt(nextSiblingEl.getAttributeValue(MULTIPLIER_ATR)) || //probably multiplicative nomenclature, should be as many outAtoms as the multiplier outAtomCount==1 && frag.getOutAtom(0).getValency()==Integer.parseInt(nextSiblingEl.getAttributeValue(MULTIPLIER_ATR))) && - hasRootLikeOrMultiRadicalGroup(state, nextSiblingEl)){ + hasRootLikeOrMultiRadicalGroup(nextSiblingEl)){ if (outAtomCount==1){//special case e.g. 4,4'-(benzylidene)dianiline FragmentTools.splitOutAtomIntoValency1OutAtoms(frag.getOutAtom(0)); //special case where something like benzylidene is being used as if it meant benzdiyl for multiplicative nomenclature @@ -861,26 +1401,26 @@ } performMultiplicativeOperations(state, group, nextSiblingEl); } - else if (group.getAttribute(ISAMULTIRADICAL_ATR)!=null){//additive nomenclature e.g. ethyleneoxy - Fragment nextFrag = getNextInScopeMultiValentFragment(state, subBracketOrRoot); - if (nextFrag!=null){ - Element nextMultiRadicalGroup = state.xmlFragmentMap.getElement(nextFrag); - Element parentSubOrRoot = (Element) nextMultiRadicalGroup.getParent(); + else if (group.getAttribute(ISAMULTIRADICAL_ATR) != null){//additive nomenclature e.g. ethyleneoxy + Fragment nextFrag = getNextInScopeMultiValentFragment(subBracketOrRoot); + if (nextFrag != null){ + Element nextMultiRadicalGroup = nextFrag.getTokenEl(); + Element parentSubOrRoot = nextMultiRadicalGroup.getParent(); if (state.currentWordRule != WordRule.polymer){//imino does not behave like a substituent in polymers only as a linker - if (nextMultiRadicalGroup.getAttribute(IMINOLIKE_ATR)!=null){//imino/methylene can just act as normal substituents, should an additive bond really be made??? - Fragment adjacentFrag =state.xmlFragmentMap.get(OpsinTools.getNextGroup(subBracketOrRoot)); + if (nextMultiRadicalGroup.getAttribute(IMINOLIKE_ATR) != null){//imino/methylene can just act as normal substituents, should an additive bond really be made??? + Fragment adjacentFrag = OpsinTools.getNextGroup(subBracketOrRoot).getFrag(); - if (nextFrag !=adjacentFrag){//imino is not the absolute next frag - if (potentiallyCanSubstitute((Element) nextMultiRadicalGroup.getParent()) || potentiallyCanSubstitute((Element) nextMultiRadicalGroup.getParent().getParent())){ + if (nextFrag != adjacentFrag){//imino is not the absolute next frag + if (potentiallyCanSubstitute(nextMultiRadicalGroup.getParent()) || potentiallyCanSubstitute(nextMultiRadicalGroup.getParent().getParent())){ return; } } } - if (group.getAttribute(IMINOLIKE_ATR)!=null && levelsToWordEl(group) > levelsToWordEl(nextMultiRadicalGroup)){ + if (group.getAttribute(IMINOLIKE_ATR) != null && levelsToWordEl(group) > levelsToWordEl(nextMultiRadicalGroup)){ return;//e.g. imino substitutes ((chloroimino)ethylene)dibenzene } } - if (parentSubOrRoot.getAttribute(MULTIPLIER_ATR)!=null){ + if (parentSubOrRoot.getAttribute(MULTIPLIER_ATR) != null){ throw new StructureBuildingException("Attempted to form additive bond to a multiplied component"); } group.addAttribute(new Attribute(RESOLVED_ATR, "yes")); @@ -888,11 +1428,11 @@ } } else {//e.g. chlorocarbonyl or hydroxy(sulfanyl)phosphoryl - List siblingFragments = findAlternativeFragments(state, subBracketOrRoot); + List siblingFragments = findAlternativeFragments(subBracketOrRoot); if (siblingFragments.size()>0){ Fragment nextFrag = siblingFragments.get(siblingFragments.size()-1); - Element nextGroup = state.xmlFragmentMap.getElement(nextFrag); - if (nextGroup.getAttribute(ACCEPTSADDITIVEBONDS_ATR)!=null && nextGroup.getAttribute(ISAMULTIRADICAL_ATR)!=null && (nextFrag.getOutAtomCount()>1|| nextGroup.getAttribute(RESOLVED_ATR)!=null && nextFrag.getOutAtomCount()>=1 )){ + Element nextGroup = nextFrag.getTokenEl(); + if (nextGroup.getAttribute(ACCEPTSADDITIVEBONDS_ATR) != null && nextGroup.getAttribute(ISAMULTIRADICAL_ATR) != null && (nextFrag.getOutAtomCount()>1|| nextGroup.getAttribute(RESOLVED_ATR) != null && nextFrag.getOutAtomCount()>=1 )){ Atom toAtom = nextFrag.getOutAtom(0).getAtom(); if (calculateSubstitutableHydrogenAtoms(toAtom) ==0){ group.addAttribute(new Attribute(RESOLVED_ATR, "yes")); @@ -902,8 +1442,8 @@ if (group.getAttribute(RESOLVED_ATR)==null && siblingFragments.size()>1){ for (int i = 0; i< siblingFragments.size()-1; i++) { Fragment lastFrag = siblingFragments.get(i); - Element lastGroup = state.xmlFragmentMap.getElement(lastFrag); - if (lastGroup.getAttribute(ACCEPTSADDITIVEBONDS_ATR)!=null && lastGroup.getAttribute(ISAMULTIRADICAL_ATR)!=null && (lastFrag.getOutAtomCount()>1|| lastGroup.getAttribute(RESOLVED_ATR)!=null && lastFrag.getOutAtomCount()>=1 )){ + Element lastGroup = lastFrag.getTokenEl(); + if (lastGroup.getAttribute(ACCEPTSADDITIVEBONDS_ATR) != null && lastGroup.getAttribute(ISAMULTIRADICAL_ATR) != null && (lastFrag.getOutAtomCount()>1|| lastGroup.getAttribute(RESOLVED_ATR) != null && lastFrag.getOutAtomCount()>=1 )){ Atom toAtom = lastFrag.getOutAtom(0).getAtom(); if (calculateSubstitutableHydrogenAtoms(toAtom) ==0){ group.addAttribute(new Attribute(RESOLVED_ATR, "yes")); @@ -913,7 +1453,7 @@ } //loop may continue if lastFrag was in fact completely unsubstitutable e.g. hydroxy...phosphoryloxy. The oxy is unsubstituable as the phosphoryl will already have bonded to it - if (lastFrag.getAtomOrNextSuitableAtom(lastFrag.getDefaultInAtom(), frag.getOutAtom(outAtomCount-1).getValency(), true)!=null){ + if (FragmentTools.findSubstituableAtoms(lastFrag, frag.getOutAtom(outAtomCount - 1).getValency()).size() > 0) { break; } } @@ -922,12 +1462,12 @@ } } else{// e.g. dimethoxyphosphoryl or bis(methylamino)phosphoryl - List siblingFragments = findAlternativeFragments(state, subBracketOrRoot); + List siblingFragments = findAlternativeFragments(subBracketOrRoot); if (siblingFragments.size()>0){ int multiplier = Integer.parseInt(subBracketOrRoot.getAttributeValue(MULTIPLIER_ATR)); Fragment nextFrag = siblingFragments.get(siblingFragments.size()-1); - Element nextGroup = state.xmlFragmentMap.getElement(nextFrag); - if (nextGroup.getAttribute(ACCEPTSADDITIVEBONDS_ATR)!=null && nextGroup.getAttribute(ISAMULTIRADICAL_ATR)!=null && (nextFrag.getOutAtomCount()>=multiplier|| nextGroup.getAttribute(RESOLVED_ATR)!=null && nextFrag.getOutAtomCount()>=multiplier +1 )){ + Element nextGroup = nextFrag.getTokenEl(); + if (nextGroup.getAttribute(ACCEPTSADDITIVEBONDS_ATR) != null && nextGroup.getAttribute(ISAMULTIRADICAL_ATR) != null && (nextFrag.getOutAtomCount()>=multiplier|| nextGroup.getAttribute(RESOLVED_ATR) != null && nextFrag.getOutAtomCount()>=multiplier +1 )){ Atom toAtom = nextFrag.getOutAtom(0).getAtom(); if (calculateSubstitutableHydrogenAtoms(toAtom) ==0){ group.addAttribute(new Attribute(RESOLVED_ATR, "yes")); @@ -937,8 +1477,8 @@ if (group.getAttribute(RESOLVED_ATR)==null && siblingFragments.size()>1){ for (int i = 0; i< siblingFragments.size()-1; i++) { Fragment lastFrag = siblingFragments.get(i); - Element lastGroup = state.xmlFragmentMap.getElement(lastFrag); - if (lastGroup.getAttribute(ACCEPTSADDITIVEBONDS_ATR)!=null && lastGroup.getAttribute(ISAMULTIRADICAL_ATR)!=null && (lastFrag.getOutAtomCount()>=multiplier|| lastGroup.getAttribute(RESOLVED_ATR)!=null && lastFrag.getOutAtomCount()>=multiplier +1 )){ + Element lastGroup = lastFrag.getTokenEl(); + if (lastGroup.getAttribute(ACCEPTSADDITIVEBONDS_ATR) != null && lastGroup.getAttribute(ISAMULTIRADICAL_ATR) != null && (lastFrag.getOutAtomCount()>=multiplier|| lastGroup.getAttribute(RESOLVED_ATR) != null && lastFrag.getOutAtomCount()>=multiplier +1 )){ Atom toAtom = lastFrag.getOutAtom(0).getAtom(); if (calculateSubstitutableHydrogenAtoms(toAtom) ==0){ group.addAttribute(new Attribute(RESOLVED_ATR, "yes")); @@ -948,7 +1488,7 @@ } //loop may continue if lastFrag was in fact completely unsubstitutable e.g. hydroxy...phosphoryloxy. The oxy is unsubstituable as the phosphoryl will already have bonded to it - if (lastFrag.getAtomOrNextSuitableAtom(lastFrag.getDefaultInAtom(), frag.getOutAtom(outAtomCount-1).getValency(), true)!=null){ + if (FragmentTools.findSubstituableAtoms(lastFrag, frag.getOutAtom(outAtomCount - 1).getValency()).size() > 0) { break; } } @@ -960,19 +1500,18 @@ /** * Searches the input for something that either is a multiRadical or has no outAtoms i.e. not dimethyl - * @param state * @param subBracketOrRoot * @return */ - private static boolean hasRootLikeOrMultiRadicalGroup(BuildState state, Element subBracketOrRoot) { - List groups = XOMTools.getDescendantElementsWithTagName(subBracketOrRoot, GROUP_EL); - if (subBracketOrRoot.getAttribute(INLOCANTS_ATR)!=null){ + private static boolean hasRootLikeOrMultiRadicalGroup(Element subBracketOrRoot) { + List groups = OpsinTools.getDescendantElementsWithTagName(subBracketOrRoot, GROUP_EL); + if (subBracketOrRoot.getAttribute(INLOCANTS_ATR) != null){ return true;// a terminus with specified inLocants } for (Element group : groups) { - Fragment frag =state.xmlFragmentMap.get(group); + Fragment frag = group.getFrag(); int outAtomCount =frag.getOutAtomCount(); - if (group.getAttribute(ISAMULTIRADICAL_ATR)!=null){ + if (group.getAttribute(ISAMULTIRADICAL_ATR) != null){ if (outAtomCount >=1 ){ return true;//a multi radical } @@ -998,7 +1537,7 @@ List elementsNotToBeMultiplied = new ArrayList();//anything before the multiplier in the sub/bracket for (int i = multiplier -1; i >=0; i--) { Element currentElement; - if (i!=0){ + if (i != 0){ currentElement = state.fragManager.cloneElement(state, subOrBracket, i); addPrimesToLocantedStereochemistryElements(currentElement, StringTools.multiplyString("'", i));//Stereochemistry elements with locants will need to have their locants primed (stereochemistry is only processed after structure building) clonedElements.add(currentElement); @@ -1010,27 +1549,27 @@ throw new StructureBuildingException("Multiplier not found where multiplier expected"); } for (int j = subOrBracket.indexOf(multiplierEl) -1 ; j >=0 ; j--) { - Element el = (Element) subOrBracket.getChild(j); + Element el = subOrBracket.getChild(j); el.detach(); elementsNotToBeMultiplied.add(el); } multiplierEl.detach(); } Element group; - if (currentElement.getLocalName().equals(BRACKET_EL)){ + if (currentElement.getName().equals(BRACKET_EL)){ group = findRightMostGroupInBracket(currentElement); } else{ group = currentElement.getFirstChildElement(GROUP_EL); } - Fragment frag = state.xmlFragmentMap.get(group); - if (frag.getOutAtomCount() !=1 ){ + Fragment frag = group.getFrag(); + if (frag.getOutAtomCount() != 1 ){ throw new StructureBuildingException("Additive bond formation failure: Fragment expected to have one OutAtom in this case but had: "+ frag.getOutAtomCount()); } joinFragmentsAdditively(state, frag, fragToAdditivelyBondTo); } for (Element clone : clonedElements) {//make sure cloned substituents don't substitute onto each other! - XOMTools.insertAfter(subOrBracket, clone); + OpsinTools.insertAfter(subOrBracket, clone); } for (Element el : elementsNotToBeMultiplied) {//re-add anything before multiplier to original subOrBracket subOrBracket.insertChild(el, 0); @@ -1045,7 +1584,7 @@ * @throws StructureBuildingException */ private static void performMultiplicativeOperations(BuildState state, Element group, Element multipliedParent) throws StructureBuildingException{ - BuildResults multiRadicalBR = new BuildResults(state, (Element) group.getParent()); + BuildResults multiRadicalBR = new BuildResults(group.getParent()); performMultiplicativeOperations(state, multiRadicalBR, multipliedParent); } @@ -1062,8 +1601,8 @@ if (LOG.isTraceEnabled()){LOG.trace(multiplier +" multiplicative bonds to be formed");} multipliedParent.removeAttribute(multipliedParent.getAttribute(MULTIPLIER_ATR)); List inLocants = null; - if (multipliedParent.getAttribute(INLOCANTS_ATR)!=null){//true for the root of a multiplicative name - String inLocantsString = multipliedParent.getAttributeValue(INLOCANTS_ATR); + String inLocantsString = multipliedParent.getAttributeValue(INLOCANTS_ATR); + if (inLocantsString != null){//true for the root of a multiplicative name if (inLocantsString.equals(INLOCANTS_DEFAULT)){ inLocants = new ArrayList(multiplier); for (int i = 0; i < multiplier; i++) { @@ -1071,7 +1610,7 @@ } } else{ - inLocants = StringTools.arrayToList(MATCH_COMMA.split(inLocantsString)); + inLocants = StringTools.arrayToList(inLocantsString.split(",")); if (inLocants.size() != multiplier){ throw new StructureBuildingException("Mismatch between multiplier and number of inLocants in multiplicative nomenclature"); } @@ -1081,39 +1620,39 @@ BuildResults newBr = new BuildResults(); for (int i = multiplier -1; i >=0; i--) { Element multipliedElement; - if (i!=0){ + if (i != 0){ multipliedElement = state.fragManager.cloneElement(state, multipliedParent, i); addPrimesToLocantedStereochemistryElements(multipliedElement, StringTools.multiplyString("'", i));//Stereochemistry elements with locants will need to have their locants primed (stereochemistry is only processed after structure building) clonedElements.add(multipliedElement); } else{ - multipliedElement=multipliedParent; + multipliedElement = multipliedParent; } //determine group that will be additively bonded to Element multipliedGroup; - if (multipliedElement.getLocalName().equals(BRACKET_EL)){ - multipliedGroup =getFirstMultiValentGroup(state, multipliedElement); + if (multipliedElement.getName().equals(BRACKET_EL)) { + multipliedGroup = getFirstMultiValentGroup(multipliedElement); if (multipliedGroup == null){//root will not have a multivalent group - List groups = XOMTools.getDescendantElementsWithTagName(multipliedElement, GROUP_EL); - if (inLocants==null){ + List groups = OpsinTools.getDescendantElementsWithTagName(multipliedElement, GROUP_EL); + if (inLocants == null){ throw new StructureBuildingException("OPSIN Bug? in locants must be specified for a multiplied root in multiplicative nomenclature"); } if (inLocants.get(0).equals(INLOCANTS_DEFAULT)){ - multipliedGroup = groups.get(groups.size()-1); + multipliedGroup = groups.get(groups.size() - 1); } else{ groupLoop: for (int j = groups.size()-1; j >=0; j--) { - Fragment possibleFrag = state.xmlFragmentMap.get(groups.get(j)); + Fragment possibleFrag = groups.get(j).getFrag(); for (String locant : inLocants) { if (possibleFrag.hasLocant(locant)){ - multipliedGroup =groups.get(j); + multipliedGroup = groups.get(j); break groupLoop; } } } } - if (multipliedGroup==null){ + if (multipliedGroup == null){ throw new StructureBuildingException("Locants for inAtoms on the root were either misassigned to the root or were invalid: " + inLocants.toString() +" could not be assigned!"); } } @@ -1121,43 +1660,53 @@ else{ multipliedGroup = multipliedElement.getFirstChildElement(GROUP_EL); } - Fragment multipliedFrag = state.xmlFragmentMap.get(multipliedGroup); + Fragment multipliedFrag = multipliedGroup.getFrag(); - Fragment multiRadicalFrag = multiRadicalBR.getOutAtom(i).getAtom().getFrag(); - Element multiRadicalGroup = state.xmlFragmentMap.getElement(multiRadicalFrag); - if (multiRadicalGroup.getAttribute(RESOLVED_ATR)==null){ - resolveUnLocantedFeatures(state, (Element) multiRadicalGroup.getParent());//the addition of unlocanted unsaturators can effect the position of radicals e.g. diazenyl + OutAtom multiRadicalOutAtom = multiRadicalBR.getOutAtom(i); + Fragment multiRadicalFrag = multiRadicalOutAtom.getAtom().getFrag(); + Element multiRadicalGroup = multiRadicalFrag.getTokenEl(); + if (multiRadicalGroup.getAttribute(RESOLVED_ATR) == null){ + resolveUnLocantedFeatures(state, multiRadicalGroup.getParent());//the addition of unlocanted unsaturators can effect the position of radicals e.g. diazenyl multiRadicalGroup.addAttribute(new Attribute(RESOLVED_ATR, "yes")); } boolean substitutivelyBondedToRoot = false; - if (inLocants !=null){ + if (inLocants != null) { Element rightMostGroup; - if (multipliedElement.getLocalName().equals(BRACKET_EL)){ + if (multipliedElement.getName().equals(BRACKET_EL)) { rightMostGroup = findRightMostGroupInBracket(multipliedElement); } else{ rightMostGroup = multipliedElement.getFirstChildElement(GROUP_EL); } rightMostGroup.addAttribute(new Attribute(RESOLVED_ATR, "yes"));//this group will not be used further within this word but can in principle be a substituent e.g. methylenedisulfonyl dichloride - if (multipliedGroup.getAttribute(ISAMULTIRADICAL_ATR)!=null){//e.g. methylenedisulfonyl dichloride - if (!multipliedParent.getAttributeValue(INLOCANTS_ATR).equals(INLOCANTS_DEFAULT)){ + if (multipliedGroup.getAttribute(ISAMULTIRADICAL_ATR) != null) {//e.g. methylenedisulfonyl dichloride + if (!multipliedParent.getAttributeValue(INLOCANTS_ATR).equals(INLOCANTS_DEFAULT)) { throw new StructureBuildingException("inLocants should not be specified for a multiradical parent in multiplicative nomenclature"); } } else{ + Atom from = multiRadicalOutAtom.getAtom(); + int bondOrder = multiRadicalOutAtom.getValency(); //bonding will be substitutive rather additive as this is bonding to a root Atom atomToJoinTo = null; for (int j = inLocants.size() -1; j >=0; j--) { String locant = inLocants.get(j); if (locant.equals(INLOCANTS_DEFAULT)){//note that if one entry in inLocantArray is default then they all are "default" - atomToJoinTo = multipliedFrag.getAtomOrNextSuitableAtomOrThrow(multipliedFrag.getDefaultInAtom(), 1, true); + List possibleAtoms = getPossibleAtomsForUnlocantedConnectionToMultipliedRoot(multipliedGroup, bondOrder, i); + if (possibleAtoms.isEmpty()) { + throw new StructureBuildingException("No suitable atom found for multiplicative operation"); + } + if (AmbiguityChecker.isSubstitutionAmbiguous(possibleAtoms, 1)) { + state.addIsAmbiguous("Connection to multiplied group: " + multipliedGroup.getValue()); + } + atomToJoinTo = possibleAtoms.get(0); inLocants.remove(j); break; } else{ Atom inAtom = multipliedFrag.getAtomByLocant(locant); - if (inAtom!=null){ + if (inAtom != null) { atomToJoinTo = inAtom; inLocants.remove(j); break; @@ -1168,43 +1717,40 @@ throw new StructureBuildingException("Locants for inAtoms on the root were either misassigned to the root or were invalid: " + inLocants.toString() +" could not be assigned!"); } - OutAtom out = multiRadicalBR.getOutAtom(i); - Atom from = out.getAtom(); - int bondOrder = out.getValency(); - if (!out.isSetExplicitly()){//not set explicitly so may be an inappropriate atom - from=from.getFrag().getAtomOrNextSuitableAtomOrThrow(from, bondOrder, false); + if (!multiRadicalOutAtom.isSetExplicitly()) {//not set explicitly so may be an inappropriate atom + from = findAtomForUnlocantedRadical(state, from.getFrag(), multiRadicalOutAtom); } - multiRadicalFrag.removeOutAtom(out); + multiRadicalFrag.removeOutAtom(multiRadicalOutAtom); state.fragManager.createBond(from, atomToJoinTo, bondOrder); - if (LOG.isTraceEnabled()){LOG.trace("Substitutively bonded (multiplicative to root) " + from.getID() + " (" +state.xmlFragmentMap.getElement(from.getFrag()).getValue()+") " + atomToJoinTo.getID() + " (" +state.xmlFragmentMap.getElement(atomToJoinTo.getFrag()).getValue()+")");} + if (LOG.isTraceEnabled()){LOG.trace("Substitutively bonded (multiplicative to root) " + from.getID() + " (" + from.getFrag().getTokenEl().getValue() + ") " + atomToJoinTo.getID() + " (" + atomToJoinTo.getFrag().getTokenEl().getValue() + ")");} substitutivelyBondedToRoot = true; } } - if (!substitutivelyBondedToRoot){ - joinFragmentsAdditively(state, multiRadicalBR.getOutAtom(i).getAtom().getFrag(), multipliedFrag); + if (!substitutivelyBondedToRoot) { + joinFragmentsAdditively(state, multiRadicalFrag, multipliedFrag); } - if (multipliedElement.getLocalName().equals(BRACKET_EL)){ + if (multipliedElement.getName().equals(BRACKET_EL)) { recursivelyResolveUnLocantedFeatures(state, multipliedElement);//there may be outAtoms that are involved in unlocanted substitution, these can be safely used now e.g. ...bis((3-hydroxy-4-methoxyphenyl)methylene) where (3-hydroxy-4-methoxyphenyl)methylene is the currentElement } - if (inLocants ==null){ + if (inLocants == null) { //currentElement is not a root element. Need to build up a new BuildResults so as to call performMultiplicativeOperations again //at this stage an outAtom has been removed from the fragment within currentElement through an additive bond - newBr.mergeBuildResults(new BuildResults(state, multipliedElement)); + newBr.mergeBuildResults(new BuildResults(multipliedElement)); } } - if (newBr.getFragmentCount()==1){ + if (newBr.getFragmentCount() == 1) { throw new StructureBuildingException("Multiplicative nomenclature cannot yield only one temporary terminal fragment"); } - if (newBr.getFragmentCount()>=2){ - List siblings = XOMTools.getNextSiblingsOfTypes(multipliedParent, new String[]{SUBSTITUENT_EL, BRACKET_EL, ROOT_EL}); - if (siblings.size()==0){ - Element parentOfMultipliedEl = (Element) multipliedParent.getParent(); - if (parentOfMultipliedEl.getLocalName().equals(BRACKET_EL)){//brackets are allowed - siblings = XOMTools.getNextSiblingsOfTypes(parentOfMultipliedEl, new String[]{SUBSTITUENT_EL, BRACKET_EL, ROOT_EL}); - if (siblings.get(0).getAttribute(MULTIPLIER_ATR)==null){ + if (newBr.getFragmentCount() >= 2) { + List siblings = OpsinTools.getNextSiblingsOfTypes(multipliedParent, new String[]{SUBSTITUENT_EL, BRACKET_EL, ROOT_EL}); + if (siblings.size() == 0) { + Element parentOfMultipliedEl = multipliedParent.getParent(); + if (parentOfMultipliedEl.getName().equals(BRACKET_EL)) {//brackets are allowed + siblings = OpsinTools.getNextSiblingsOfTypes(parentOfMultipliedEl, new String[]{SUBSTITUENT_EL, BRACKET_EL, ROOT_EL}); + if (siblings.get(0).getAttribute(MULTIPLIER_ATR) == null) { throw new StructureBuildingException("Multiplier not found where multiplier was expected for succesful multiplicative nomenclature"); } performMultiplicativeOperations(state, newBr, siblings.get(0)); @@ -1214,7 +1760,7 @@ } } else{ - if (siblings.get(0).getAttribute(MULTIPLIER_ATR)==null){ + if (siblings.get(0).getAttribute(MULTIPLIER_ATR) == null) { throw new StructureBuildingException("Multiplier not found where multiplier was expected for successful multiplicative nomenclature"); } performMultiplicativeOperations(state, newBr, siblings.get(0)); @@ -1222,41 +1768,88 @@ } for (Element clone : clonedElements) {//only insert cloned substituents now so they don't substitute onto each other! - XOMTools.insertAfter(multipliedParent, clone); + OpsinTools.insertAfter(multipliedParent, clone); } } /** + * Applies special case to prefer the end of chains with the usableAsAJoiner attributes cf. p-phenylenedipropionic acid + * Such cases will still be considered to be formally ambiguous + * @param multipliedGroup + * @param multipliedFrag + * @param bondOrder + * @param primesAdded + * @return + * @throws StructureBuildingException + */ + private static List getPossibleAtomsForUnlocantedConnectionToMultipliedRoot(Element multipliedGroup, int bondOrder, int primesAdded) throws StructureBuildingException { + Fragment multipliedFrag = multipliedGroup.getFrag(); + if ("yes".equals(multipliedGroup.getAttributeValue(USABLEASJOINER_ATR)) && multipliedFrag.getDefaultInAtom() == null) { + Element previous = OpsinTools.getPrevious(multipliedGroup); + if (previous != null && previous.getName().equals(MULTIPLIER_EL)){ + String locant = getLocantOfEndOfChainIfGreaterThan1(multipliedFrag, primesAdded); + if (locant != null) { + Atom preferredAtom = multipliedFrag.getAtomByLocantOrThrow(locant); + List possibleAtoms = FragmentTools.findnAtomsForSubstitution(multipliedFrag.getAtomList(), preferredAtom, 1, bondOrder, true); + if (possibleAtoms == null) { + possibleAtoms = Collections.emptyList(); + } + return possibleAtoms; + } + } + } + return FragmentTools.findSubstituableAtoms(multipliedFrag, bondOrder); + } + + private static String getLocantOfEndOfChainIfGreaterThan1(Fragment frag, int primes) { + String primesStr = StringTools.multiplyString("'", primes); + int length = 0; + Atom next = frag.getAtomByLocant(Integer.toString(length + 1) + primesStr); + Atom previous = null; + while (next != null){ + if (previous != null && previous.getBondToAtom(next) == null){ + break; + } + length++; + previous = next; + next = frag.getAtomByLocant(Integer.toString(length + 1) + primesStr); + } + if (length > 1){ + return Integer.toString(length) + primesStr; + } + return null; + } + + /** * Given a subsituent/bracket finds the next multi valent substituent/root that is in scope and hence its group * e.g. for oxy(dichloromethyl)methylene given oxy substituent the methylene group would be found * for oxy(dichloroethylene) given oxy substituent the ethylene group would be found * for oxy(carbonylimino) given oxy carbonyl would be found - * @param state * @param substituentOrBracket * @return frag * @throws StructureBuildingException */ - private static Fragment getNextInScopeMultiValentFragment(BuildState state, Element substituentOrBracket) throws StructureBuildingException { - if (!substituentOrBracket.getLocalName().equals(SUBSTITUENT_EL) && !substituentOrBracket.getLocalName().equals(BRACKET_EL)){ + private static Fragment getNextInScopeMultiValentFragment(Element substituentOrBracket) throws StructureBuildingException { + if (!substituentOrBracket.getName().equals(SUBSTITUENT_EL) && !substituentOrBracket.getName().equals(BRACKET_EL)){ throw new StructureBuildingException("Input to this function should be a substituent or bracket"); } if (substituentOrBracket.getParent()==null){ throw new StructureBuildingException("substituent did not have a parent!"); } - Element parent =(Element) substituentOrBracket.getParent(); + Element parent = substituentOrBracket.getParent(); - List children = XOMTools.getChildElementsWithTagNames(parent, new String[]{SUBSTITUENT_EL, BRACKET_EL, ROOT_EL});//will be returned in index order + List children = OpsinTools.getChildElementsWithTagNames(parent, new String[]{SUBSTITUENT_EL, BRACKET_EL, ROOT_EL});//will be returned in index order int indexOfSubstituent =parent.indexOf(substituentOrBracket); for (Element child : children) { if (parent.indexOf(child) <=indexOfSubstituent){//only want things after the input continue; } - if (child.getAttribute(MULTIPLIER_ATR)!=null){ + if (child.getAttribute(MULTIPLIER_ATR) != null){ continue; } List childDescendants; - if (child.getLocalName().equals(BRACKET_EL)){ - childDescendants = XOMTools.getDescendantElementsWithTagNames(child, new String[]{SUBSTITUENT_EL, ROOT_EL});//will be returned in depth-first order + if (child.getName().equals(BRACKET_EL)){ + childDescendants = OpsinTools.getDescendantElementsWithTagNames(child, new String[]{SUBSTITUENT_EL, ROOT_EL});//will be returned in depth-first order } else{ childDescendants =new ArrayList(); @@ -1267,9 +1860,9 @@ if (group == null){ throw new StructureBuildingException("substituent/root is missing its group"); } - Fragment possibleFrag = state.xmlFragmentMap.get(group); - if (group.getAttribute(ISAMULTIRADICAL_ATR)!=null && - (possibleFrag.getOutAtomCount() >=2 || (possibleFrag.getOutAtomCount() >=1 && group.getAttribute(RESOLVED_ATR)!=null ))){ + Fragment possibleFrag = group.getFrag(); + if (group.getAttribute(ISAMULTIRADICAL_ATR) != null && + (possibleFrag.getOutAtomCount() >=2 || (possibleFrag.getOutAtomCount() >=1 && group.getAttribute(RESOLVED_ATR) != null ))){ return possibleFrag; } } @@ -1279,21 +1872,20 @@ /** * Given a bracket searches in a depth first manner for the first multi valent group - * @param state * @param bracket * @return group * @throws StructureBuildingException */ - private static Element getFirstMultiValentGroup(BuildState state, Element bracket) throws StructureBuildingException { - if (!bracket.getLocalName().equals(BRACKET_EL)){ + private static Element getFirstMultiValentGroup(Element bracket) throws StructureBuildingException { + if (!bracket.getName().equals(BRACKET_EL)){ throw new StructureBuildingException("Input to this function should be a bracket"); } - List groups = XOMTools.getDescendantElementsWithTagName(bracket, GROUP_EL);//will be returned in index order + List groups = OpsinTools.getDescendantElementsWithTagName(bracket, GROUP_EL);//will be returned in index order for (Element group : groups) { - Fragment possibleFrag = state.xmlFragmentMap.get(group); - if (group.getAttribute(ISAMULTIRADICAL_ATR)!=null && - (possibleFrag.getOutAtomCount() >=2 || (possibleFrag.getOutAtomCount() >=1 && group.getAttribute(RESOLVED_ATR)!=null ))){ + Fragment possibleFrag = group.getFrag(); + if (group.getAttribute(ISAMULTIRADICAL_ATR) != null && + (possibleFrag.getOutAtomCount() >=2 || (possibleFrag.getOutAtomCount() >=1 && group.getAttribute(RESOLVED_ATR) != null ))){ return group; } } @@ -1301,11 +1893,11 @@ } private static void joinFragmentsAdditively(BuildState state, Fragment fragToBeJoined, Fragment parentFrag) throws StructureBuildingException { - Element elOfFragToBeJoined = state.xmlFragmentMap.getElement(fragToBeJoined); + Element elOfFragToBeJoined = fragToBeJoined.getTokenEl(); if (EPOXYLIKE_SUBTYPE_VAL.equals(elOfFragToBeJoined.getAttributeValue(SUBTYPE_ATR))){ for (int i = 0, l = fragToBeJoined.getOutAtomCount(); i < l; i++) { OutAtom outAtom = fragToBeJoined.getOutAtom(i); - if (outAtom.getLocant()!=null){ + if (outAtom.getLocant() != null){ throw new StructureBuildingException("Inappropriate use of " + elOfFragToBeJoined.getValue()); } } @@ -1324,7 +1916,7 @@ boolean unresolvedAmbiguity =false; for (int i = 1, l = parentFrag.getOutAtomCount(); i < l; i++) { OutAtom outAtom = parentFrag.getOutAtom(i); - if (outAtom.getValency()!=firstOutAtomOrder){ + if (outAtom.getValency() != firstOutAtomOrder){ unresolvedAmbiguity =true; } } @@ -1333,29 +1925,29 @@ unresolvedAmbiguity =false; for (int i = 1, l = fragToBeJoined.getOutAtomCount(); i < l; i++) { OutAtom outAtom = fragToBeJoined.getOutAtom(i); - if (outAtom.getValency()!=firstOutAtomOrder){ + if (outAtom.getValency() != firstOutAtomOrder){ unresolvedAmbiguity =true; } } if (unresolvedAmbiguity && outAtomCountOnFragToBeJoined == 2){//not all outAtoms on frag to be joined are equivalent either! //Solves the specific case of 2,2'-[ethane-1,2-diylbis(azanylylidenemethanylylidene)]diphenol vs 2,2'-[ethane-1,2-diylidenebis(azanylylidenemethanylylidene)]bis(cyclohexan-1-ol) //but does not solve the general case as only a single look behind is performed. - Element previousGroup = (Element) OpsinTools.getPreviousGroup(elOfFragToBeJoined); - if (previousGroup!=null){ - Fragment previousFrag = state.xmlFragmentMap.get(previousGroup); + Element previousGroup = OpsinTools.getPreviousGroup(elOfFragToBeJoined); + if (previousGroup != null){ + Fragment previousFrag = previousGroup.getFrag(); if (previousFrag.getOutAtomCount() > 1){ int previousGroupFirstOutAtomOrder = previousFrag.getOutAtom(0).getValency(); unresolvedAmbiguity =false; for (int i = 1, l = previousFrag.getOutAtomCount(); i < l; i++) { OutAtom outAtom = previousFrag.getOutAtom(i); - if (outAtom.getValency()!=previousGroupFirstOutAtomOrder){ + if (outAtom.getValency() != previousGroupFirstOutAtomOrder){ unresolvedAmbiguity =true; } } if (!unresolvedAmbiguity && previousGroupFirstOutAtomOrder==parentFrag.getOutAtom(0).getValency()){ for (int i = 1, l = parentFrag.getOutAtomCount(); i < l; i++) { OutAtom outAtom = parentFrag.getOutAtom(i); - if (outAtom.getValency()!=previousGroupFirstOutAtomOrder){ + if (outAtom.getValency() != previousGroupFirstOutAtomOrder){ in = outAtom; break; } @@ -1381,7 +1973,7 @@ Atom to = in.getAtom(); int bondOrder = in.getValency(); if (!in.isSetExplicitly()){//not set explicitly so may be an inappropriate atom - to = to.getFrag().getAtomOrNextSuitableAtomOrThrow(to, bondOrder, false); + to = findAtomForUnlocantedRadical(state, to.getFrag(), in); } parentFrag.removeOutAtom(in); @@ -1400,7 +1992,7 @@ Atom lastOutAtom = fragToBeJoined.getOutAtom(outAtomCountOnFragToBeJoined -1).getAtom(); for (int i =outAtomCountOnFragToBeJoined -1; i >= 0; i--) { OutAtom nextOutAtom = fragToBeJoined.getOutAtom(i); - if (nextOutAtom.getAtom() !=lastOutAtom){ + if (nextOutAtom.getAtom() != lastOutAtom){ throw new StructureBuildingException("Additive bond formation failure: bond order disagreement"); } valency += nextOutAtom.getValency(); @@ -1422,16 +2014,16 @@ Atom from = out.getAtom(); if (!out.isSetExplicitly()){//not set explicitly so may be an inappropriate atom - from=from.getFrag().getAtomOrNextSuitableAtomOrThrow(from, bondOrder, false); + from = findAtomForUnlocantedRadical(state, from.getFrag(), out); } fragToBeJoined.removeOutAtom(out); state.fragManager.createBond(from, to, bondOrder); - if (LOG.isTraceEnabled()){LOG.trace("Additively bonded " + from.getID() + " (" +state.xmlFragmentMap.getElement(from.getFrag()).getValue()+") " + to.getID() + " (" +state.xmlFragmentMap.getElement(to.getFrag()).getValue()+")" );} + if (LOG.isTraceEnabled()){LOG.trace("Additively bonded " + from.getID() + " (" + from.getFrag().getTokenEl().getValue() + ") " + to.getID() + " (" + to.getFrag().getTokenEl().getValue() + ")" );} } private static void joinFragmentsSubstitutively(BuildState state, Fragment fragToBeJoined, Atom atomToJoinTo) throws StructureBuildingException { - Element elOfFragToBeJoined = state.xmlFragmentMap.getElement(fragToBeJoined); + Element elOfFragToBeJoined = fragToBeJoined.getTokenEl(); if (EPOXYLIKE_SUBTYPE_VAL.equals(elOfFragToBeJoined.getAttributeValue(SUBTYPE_ATR))){ formEpoxide(state, fragToBeJoined, atomToJoinTo); return; @@ -1443,7 +2035,7 @@ if (outAtomCount ==0 ){ throw new StructureBuildingException("Substitutive bond formation failure: Fragment expected to have one OutAtom but had none"); } - if (state.xmlFragmentMap.getElement(fragToBeJoined).getAttribute(IMINOLIKE_ATR)!=null){//special case for methylene/imino + if (elOfFragToBeJoined.getAttribute(IMINOLIKE_ATR) != null){//special case for methylene/imino if (fragToBeJoined.getOutAtomCount()==1 && fragToBeJoined.getOutAtom(0).getValency()==1 ){ fragToBeJoined.getOutAtom(0).setValency(2); } @@ -1452,12 +2044,21 @@ Atom from = out.getAtom(); int bondOrder = out.getValency(); if (!out.isSetExplicitly()){//not set explicitly so may be an inappropriate atom - from=from.getFrag().getAtomOrNextSuitableAtomOrThrow(from, bondOrder, false); + List possibleAtoms = FragmentTools.findnAtomsForSubstitution(fragToBeJoined.getAtomList(), from, 1, bondOrder, false); + if (possibleAtoms == null){ + throw new StructureBuildingException("Failed to assign all unlocanted radicals to actual atoms without violating valency"); + } + if (!((ALKANESTEM_SUBTYPE_VAL.equals(fragToBeJoined.getSubType()) || HETEROSTEM_SUBTYPE_VAL.equals(fragToBeJoined.getSubType())) && possibleAtoms.get(0).equals(fragToBeJoined.getFirstAtom()))) { + if (AmbiguityChecker.isSubstitutionAmbiguous(possibleAtoms, 1)) { + state.addIsAmbiguous("Positioning of radical on: " + fragToBeJoined.getTokenEl().getValue()); + } + } + from = possibleAtoms.get(0); } fragToBeJoined.removeOutAtom(out); state.fragManager.createBond(from, atomToJoinTo, bondOrder); - if (LOG.isTraceEnabled()){LOG.trace("Substitutively bonded " + from.getID() + " (" +state.xmlFragmentMap.getElement(from.getFrag()).getValue()+") " + atomToJoinTo.getID() + " (" +state.xmlFragmentMap.getElement(atomToJoinTo.getFrag()).getValue()+")");} + if (LOG.isTraceEnabled()){LOG.trace("Substitutively bonded " + from.getID() + " (" + from.getFrag().getTokenEl().getValue() + ") " + atomToJoinTo.getID() + " (" + atomToJoinTo.getFrag().getTokenEl().getValue() + ")");} } /** @@ -1467,16 +2068,17 @@ * @param state * @param bridgingFragment * @param atomToJoinTo + * @return Atoms that the bridgingFragment attached to * @throws StructureBuildingException */ - static void formEpoxide(BuildState state, Fragment bridgingFragment, Atom atomToJoinTo) throws StructureBuildingException { + static Atom[] formEpoxide(BuildState state, Fragment bridgingFragment, Atom atomToJoinTo) throws StructureBuildingException { Fragment fragToJoinTo = atomToJoinTo.getFrag(); List atomList = fragToJoinTo.getAtomList(); if (atomList.size()==1){ throw new StructureBuildingException("Epoxides must be formed between two different atoms"); } Atom firstAtomToJoinTo; - if (bridgingFragment.getOutAtom(0).getLocant()!=null){ + if (bridgingFragment.getOutAtom(0).getLocant() != null){ firstAtomToJoinTo = fragToJoinTo.getAtomByLocantOrThrow(bridgingFragment.getOutAtom(0).getLocant()); } else{ @@ -1484,72 +2086,119 @@ } Atom chalcogenAtom1 = bridgingFragment.getOutAtom(0).getAtom(); bridgingFragment.removeOutAtom(0); + + //In epoxy chalcogenAtom1 will be chalcogenAtom2. Methylenedioxy is also handled by this method + state.fragManager.createBond(chalcogenAtom1, firstAtomToJoinTo, 1); + Atom secondAtomToJoinTo; - if (bridgingFragment.getOutAtom(0).getLocant()!=null){ + if (bridgingFragment.getOutAtom(0).getLocant() != null){ secondAtomToJoinTo = fragToJoinTo.getAtomByLocantOrThrow(bridgingFragment.getOutAtom(0).getLocant()); } else{ int index = atomList.indexOf(firstAtomToJoinTo); - if (index +1 >= atomList.size()){ - secondAtomToJoinTo = fragToJoinTo.getAtomOrNextSuitableAtomOrThrow(atomList.get(index-1), 1, true); + Atom preferredAtom = (index + 1 >= atomList.size()) ? atomList.get(index - 1) : atomList.get(index + 1); + List possibleSecondAtom = FragmentTools.findnAtomsForSubstitution(fragToJoinTo.getAtomList(), preferredAtom, 1, 1, true); + if (possibleSecondAtom != null) { + possibleSecondAtom.removeAll(Collections.singleton(firstAtomToJoinTo)); } - else{ - secondAtomToJoinTo = fragToJoinTo.getAtomOrNextSuitableAtomOrThrow(atomList.get(index+1), 1, true); + if (possibleSecondAtom == null || possibleSecondAtom.size() == 0) { + throw new StructureBuildingException("Unable to find suitable atom to form bridge"); } + if (AmbiguityChecker.isSubstitutionAmbiguous(possibleSecondAtom, 1)) { + state.addIsAmbiguous("Addition of bridge to: "+ fragToJoinTo.getTokenEl().getValue()); + } + secondAtomToJoinTo = possibleSecondAtom.get(0); } Atom chalcogenAtom2 = bridgingFragment.getOutAtom(0).getAtom(); bridgingFragment.removeOutAtom(0); if (chalcogenAtom1.equals(chalcogenAtom2) && firstAtomToJoinTo == secondAtomToJoinTo){ throw new StructureBuildingException("Epoxides must be formed between two different atoms"); } - //In epoxy chalcogenAtom1 will be chalcogenAtom2. Methylenedioxy is also handled by this method - state.fragManager.createBond(chalcogenAtom1, firstAtomToJoinTo, 1); state.fragManager.createBond(chalcogenAtom2, secondAtomToJoinTo, 1); CycleDetector.assignWhetherAtomsAreInCycles(bridgingFragment); + return new Atom[]{firstAtomToJoinTo, secondAtomToJoinTo}; } - - private static Atom findAtomForSubstitution(BuildState state, Element subOrBracket, int bondOrder) { - Atom to =null; - List possibleParents =findAlternativeFragments(state, subOrBracket); - for (Fragment fragment : possibleParents) { - to = fragment.getAtomOrNextSuitableAtom(fragment.getDefaultInAtom(), bondOrder, true); - if (to !=null){ - break; + + /** + * Attempts to find an in-scope fragment capable of forming the given numberOfSubstitutions each with the given bondOrder + * @param subOrBracket + * @param numberOfSubstitutions + * @param bondOrder + * @return + */ + private static List findAtomsForSubstitution(Element subOrBracket, int numberOfSubstitutions, int bondOrder) { + boolean rootHandled = false; + List possibleParents = findAlternativeGroups(subOrBracket); + for (int i = 0, l = possibleParents.size(); i < l; i++) { + Element possibleParent = possibleParents.get(i); + Fragment frag = possibleParent.getFrag(); + List substitutableAtoms; + if (possibleParent.getParent().getName().equals(ROOT_EL)){//consider all root groups as if they were one + if(rootHandled) { + continue; + } + List atoms = frag.getAtomList(); + for (int j = i + 1; j < l; j++) { + Element possibleOtherRoot = possibleParents.get(j); + if (possibleOtherRoot.getParent().getName().equals(ROOT_EL)) { + atoms.addAll(possibleOtherRoot.getFrag().getAtomList()); + } + } + rootHandled = true; + substitutableAtoms = FragmentTools.findnAtomsForSubstitution(atoms, frag.getDefaultInAtom(), numberOfSubstitutions, bondOrder, true); + } + else{ + substitutableAtoms = FragmentTools.findnAtomsForSubstitution(frag, numberOfSubstitutions, bondOrder); + } + if (substitutableAtoms != null){ + return substitutableAtoms; } } - return to; + return null; } /** - * Finds all the groups accessible from the startingElement taking into account brackets + * Finds all the fragments accessible from the startingElement taking into account brackets * i.e. those that it is feasible that the group of the startingElement could substitute onto - * @param state * @param startingElement * @return A list of fragments in the order to try them as possible parent fragments (for substitutive operations) */ - static List findAlternativeFragments(BuildState state, Element startingElement) { - Stack stack = new Stack(); - stack.add((Element) startingElement.getParent()); - List foundFragments =new ArrayList(); - boolean doneFirstIteration =false;//check on index only done on first iteration to only get elements with an index greater than the starting element - while (stack.size()>0){ - Element currentElement =stack.pop(); - if (currentElement.getLocalName().equals(GROUP_EL)){ - Fragment groupFrag =state.xmlFragmentMap.get(currentElement); - foundFragments.add(groupFrag); + static List findAlternativeFragments(Element startingElement) { + List foundFragments = new ArrayList(); + for (Element group : findAlternativeGroups(startingElement)) { + foundFragments.add(group.getFrag()); + } + return foundFragments; + } + + /** + * Finds all the groups accessible from the startingElement taking into account brackets + * i.e. those that it is feasible that the group of the startingElement could substitute onto + * @param startingElement + * @return A list of groups in the order to try them as possible parent groups (for substitutive operations) + */ + static List findAlternativeGroups(Element startingElement) { + Deque stack = new ArrayDeque(); + stack.add(startingElement.getParent()); + List foundGroups = new ArrayList(); + boolean doneFirstIteration = false;//check on index only done on first iteration to only get elements with an index greater than the starting element + while (stack.size() > 0) { + Element currentElement =stack.removeLast(); + if (currentElement.getName().equals(GROUP_EL)) { + foundGroups.add(currentElement); continue; } - List siblings = XOMTools.getChildElementsWithTagNames(currentElement, new String[]{BRACKET_EL, SUBSTITUENT_EL, ROOT_EL}); + List siblings = OpsinTools.getChildElementsWithTagNames(currentElement, new String[]{BRACKET_EL, SUBSTITUENT_EL, ROOT_EL}); - Stack bracketted = new Stack(); + List bracketted = new ArrayList(); for (Element bracketOrSubOrRoot : siblings) { - if (!doneFirstIteration && currentElement.indexOf(bracketOrSubOrRoot)<=currentElement.indexOf(startingElement)){ + if (!doneFirstIteration && currentElement.indexOf(bracketOrSubOrRoot) <= currentElement.indexOf(startingElement)){ continue; } - if (bracketOrSubOrRoot.getAttribute(MULTIPLIER_ATR)!=null){ + if (bracketOrSubOrRoot.getAttribute(MULTIPLIER_ATR) != null){ continue; } - if (bracketOrSubOrRoot.getLocalName().equals(BRACKET_EL)){ + if (bracketOrSubOrRoot.getName().equals(BRACKET_EL)){ if (IMPLICIT_TYPE_VAL.equals(bracketOrSubOrRoot.getAttributeValue(TYPE_ATR))){ stack.add(bracketOrSubOrRoot); } @@ -1562,35 +2211,37 @@ stack.add(group); } } - stack.addAll(0, bracketted);//locanting into brackets is rarely the desired answer so place at the bottom of the stack - doneFirstIteration =true; + //locanting into brackets is rarely the desired answer so place at the bottom of the stack + for (int i = bracketted.size() -1; i >=0; i--) { + stack.addFirst(bracketted.get(i)); + } + doneFirstIteration = true; } - return foundFragments; + return foundGroups; } /** * Checks through the groups accessible from the currentElement taking into account brackets * i.e. those that it is feasible that the group of the currentElement could substitute onto - * @param state * @param startingElement * @param locant: the locant string to check for the presence of * @return The fragment with the locant, or null * @throws StructureBuildingException */ - private static Fragment findFragmentWithLocant(BuildState state, Element startingElement, String locant) throws StructureBuildingException { - Stack stack = new Stack(); - stack.add((Element) startingElement.getParent()); + private static Fragment findFragmentWithLocant(Element startingElement, String locant) throws StructureBuildingException { + Deque stack = new ArrayDeque(); + stack.add(startingElement.getParent()); boolean doneFirstIteration =false;//check on index only done on first iteration to only get elements with an index greater than the starting element Fragment monoNuclearHydride =null;//e.g. methyl/methane - In this case no locant would be expected as unlocanted substitution is always unambiguous. Hence deprioritise while (stack.size()>0){ - Element currentElement =stack.pop(); - if (currentElement.getLocalName().equals(SUBSTITUENT_EL)|| currentElement.getLocalName().equals(ROOT_EL)){ - Fragment groupFrag =state.xmlFragmentMap.get(currentElement.getFirstChildElement(GROUP_EL)); - if (monoNuclearHydride!=null && currentElement.getAttribute(LOCANT_ATR)!=null){//It looks like all groups are locanting onto the monoNuclearHydride e.g. 1-oxo-1-phenyl-sulfanylidene + Element currentElement =stack.removeLast(); + if (currentElement.getName().equals(SUBSTITUENT_EL)|| currentElement.getName().equals(ROOT_EL)){ + Fragment groupFrag = currentElement.getFirstChildElement(GROUP_EL).getFrag(); + if (monoNuclearHydride != null && currentElement.getAttribute(LOCANT_ATR) != null){//It looks like all groups are locanting onto the monoNuclearHydride e.g. 1-oxo-1-phenyl-sulfanylidene return monoNuclearHydride; } if (groupFrag.hasLocant(locant)){ - if (locant.equals("1") && groupFrag.getAtomList().size()==1){ + if (locant.equals("1") && groupFrag.getAtomCount()==1){ if (monoNuclearHydride ==null){ monoNuclearHydride= groupFrag; } @@ -1601,12 +2252,12 @@ } continue; } - else if (monoNuclearHydride!=null){ + else if (monoNuclearHydride != null){ return monoNuclearHydride; } - List siblings = XOMTools.getChildElementsWithTagNames(currentElement, new String[]{BRACKET_EL, SUBSTITUENT_EL, ROOT_EL}); + List siblings = OpsinTools.getChildElementsWithTagNames(currentElement, new String[]{BRACKET_EL, SUBSTITUENT_EL, ROOT_EL}); - Stack bracketted = new Stack(); + List bracketted = new ArrayList(); if (!doneFirstIteration){//on the first iteration, ignore elements before the starting element and favour the element directly after the starting element (conditions apply) int indexOfStartingEl = currentElement.indexOf(startingElement); Element substituentToTryFirst =null; @@ -1615,10 +2266,10 @@ if (indexOfCurrentEl <= indexOfStartingEl){ continue; } - if (bracketOrSubOrRoot.getAttribute(MULTIPLIER_ATR)!=null){ + if (bracketOrSubOrRoot.getAttribute(MULTIPLIER_ATR) != null){ continue; } - if (bracketOrSubOrRoot.getLocalName().equals(BRACKET_EL)){ + if (bracketOrSubOrRoot.getName().equals(BRACKET_EL)){ if (IMPLICIT_TYPE_VAL.equals(bracketOrSubOrRoot.getAttributeValue(TYPE_ATR))){ stack.add(bracketOrSubOrRoot); } @@ -1635,17 +2286,17 @@ } } } - if (substituentToTryFirst !=null){ + if (substituentToTryFirst != null){ stack.add(substituentToTryFirst); } doneFirstIteration =true; } else { for (Element bracketOrSubOrRoot : siblings) { - if (bracketOrSubOrRoot.getAttribute(MULTIPLIER_ATR)!=null){ + if (bracketOrSubOrRoot.getAttribute(MULTIPLIER_ATR) != null){ continue; } - if (bracketOrSubOrRoot.getLocalName().equals(BRACKET_EL)){ + if (bracketOrSubOrRoot.getName().equals(BRACKET_EL)){ if (IMPLICIT_TYPE_VAL.equals(bracketOrSubOrRoot.getAttributeValue(TYPE_ATR))){ stack.add(bracketOrSubOrRoot); } @@ -1658,24 +2309,48 @@ } } } - stack.addAll(0, bracketted);//locanting into brackets is rarely the desired answer so place at the bottom of the stack + //locanting into brackets is rarely the desired answer so place at the bottom of the stack + for (int i = bracketted.size() -1; i >=0; i--) { + stack.addFirst(bracketted.get(i)); + } } return monoNuclearHydride; } static Element findRightMostGroupInBracket(Element bracket) { - List subsBracketsAndRoots = XOMTools.getChildElementsWithTagNames(bracket, new String[]{BRACKET_EL, SUBSTITUENT_EL, ROOT_EL}); - while (subsBracketsAndRoots.get(subsBracketsAndRoots.size()-1).getLocalName().equals(BRACKET_EL)){ - subsBracketsAndRoots = XOMTools.getChildElementsWithTagNames(subsBracketsAndRoots.get(subsBracketsAndRoots.size()-1), new String[]{BRACKET_EL, SUBSTITUENT_EL, ROOT_EL}); + List subsBracketsAndRoots = OpsinTools.getChildElementsWithTagNames(bracket, new String[]{BRACKET_EL, SUBSTITUENT_EL, ROOT_EL}); + Element lastSubsBracketOrRoot = subsBracketsAndRoots.get(subsBracketsAndRoots.size() - 1); + while (lastSubsBracketOrRoot.getName().equals(BRACKET_EL)) { + subsBracketsAndRoots = OpsinTools.getChildElementsWithTagNames(lastSubsBracketOrRoot, new String[]{BRACKET_EL, SUBSTITUENT_EL, ROOT_EL}); + lastSubsBracketOrRoot = subsBracketsAndRoots.get(subsBracketsAndRoots.size() - 1); + } + return findRightMostGroupInSubOrRoot(lastSubsBracketOrRoot); + } + + static Element findRightMostGroupInSubBracketOrRoot(Element subBracketOrRoot) { + if (subBracketOrRoot.getName().equals(BRACKET_EL)) { + return findRightMostGroupInBracket(subBracketOrRoot); + } + else { + return findRightMostGroupInSubOrRoot(subBracketOrRoot); } - return subsBracketsAndRoots.get(subsBracketsAndRoots.size()-1).getFirstChildElement(GROUP_EL); + } + + private static Element findRightMostGroupInSubOrRoot(Element subOrRoot) { + for (int i = subOrRoot.getChildCount() - 1; i >= 0; i--) { + Element el = subOrRoot.getChild(i); + if (el.getName().equals(GROUP_EL)) { + return el; + } + } + return null; } private static boolean potentiallyCanSubstitute(Element subBracketOrRoot) { - Element parent =(Element) subBracketOrRoot.getParent(); - Elements children =parent.getChildElements(); + Element parent = subBracketOrRoot.getParent(); + List children =parent.getChildElements(); for (int i = parent.indexOf(subBracketOrRoot) +1 ; i < children.size(); i++) { - if (!children.get(i).getLocalName().equals(HYPHEN_EL)){ + if (!children.get(i).getName().equals(HYPHEN_EL)){ return true; } } @@ -1686,12 +2361,12 @@ int terminalPrimes = StringTools.countTerminalPrimes(locantString); if (terminalPrimes > 0){ int brackettingDepth = 0; - Element parent = (Element) subBracketOrRoot.getParent(); - while (parent !=null && parent.getLocalName().equals(BRACKET_EL)){ + Element parent = subBracketOrRoot.getParent(); + while (parent != null && parent.getName().equals(BRACKET_EL)){ if (!IMPLICIT_TYPE_VAL.equals(parent.getAttributeValue(TYPE_ATR))){ brackettingDepth++; } - parent = (Element) parent.getParent(); + parent = parent.getParent(); } if (terminalPrimes == brackettingDepth){ return locantString.substring(0, locantString.length() - terminalPrimes); @@ -1710,7 +2385,7 @@ */ private static void checkAndApplySpecialCaseWhereOutAtomsCanBeCombinedOrThrow(Fragment frag, Element group) throws StructureBuildingException { int outAtomCount = frag.getOutAtomCount(); - if (outAtomCount<=1){ + if (outAtomCount <= 1) { return; } if (EPOXYLIKE_SUBTYPE_VAL.equals(group.getAttributeValue(SUBTYPE_ATR))){ @@ -1720,18 +2395,18 @@ if (groupValue.equals("oxy") || groupValue.equals("thio") || groupValue.equals("seleno") || groupValue.equals("telluro")){//always bivalent return; } - //special case- all outAtoms on same atom e.g. methylenecyclohexane + //special case: all outAtoms on same atom e.g. methylenecyclohexane Atom firstOutAtom = frag.getOutAtom(0).getAtom(); - int valencyOfOutAtom =0; - for (int i = outAtomCount -1; i >=0 ; i--) {//remove all outAtoms and add one with the total valency of all those that have been removed + int valencyOfOutAtom = 0; + for (int i = outAtomCount - 1; i >=0 ; i--) {//remove all outAtoms and add one with the total valency of all those that have been removed OutAtom out = frag.getOutAtom(i); - if (out.getAtom() !=firstOutAtom){ + if (!out.getAtom().equals(firstOutAtom)){ throw new StructureBuildingException("Substitutive bond formation failure: Fragment expected to have one OutAtom but had: "+ outAtomCount); } - valencyOfOutAtom +=out.getValency(); + valencyOfOutAtom += out.getValency(); frag.removeOutAtom(i); } - frag.addOutAtom(frag.getFirstAtom(), valencyOfOutAtom, true); + frag.addOutAtom(firstOutAtom, valencyOfOutAtom, true); } /** @@ -1744,8 +2419,9 @@ */ static int calculateSubstitutableHydrogenAtoms(Atom atom) { int valency = atom.determineValency(true); - int currentValency =atom.getIncomingValency() + atom.getOutValency(); - return valency-currentValency; + int currentValency = atom.getIncomingValency() + atom.getOutValency(); + int substitutableHydrogen = valency - currentValency; + return substitutableHydrogen >= 0 ? substitutableHydrogen : 0; } /** @@ -1756,9 +2432,9 @@ * @param primesString */ private static void addPrimesToLocantedStereochemistryElements(Element subOrBracket, String primesString) { - List stereoChemistryElements =XOMTools.getDescendantElementsWithTagName(subOrBracket, STEREOCHEMISTRY_EL); + List stereoChemistryElements =OpsinTools.getDescendantElementsWithTagName(subOrBracket, STEREOCHEMISTRY_EL); for (Element stereoChemistryElement : stereoChemistryElements) { - if (stereoChemistryElement.getAttribute(LOCANT_ATR)!=null){ + if (stereoChemistryElement.getAttribute(LOCANT_ATR) != null){ stereoChemistryElement.getAttribute(LOCANT_ATR).setValue(stereoChemistryElement.getAttributeValue(LOCANT_ATR) + primesString); } } @@ -1772,9 +2448,9 @@ */ private static Integer levelsToWordEl(Element element) { int count =0; - while (!element.getLocalName().equals(WORD_EL)){ - element =(Element) element.getParent(); - if (element==null){ + while (!element.getName().equals(WORD_EL)){ + element = element.getParent(); + if (element == null){ return null; } count++; diff -Nru opsin-1.5.0/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/SuffixApplier.java opsin-2.3.1/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/SuffixApplier.java --- opsin-1.5.0/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/SuffixApplier.java 1970-01-01 00:00:00.000000000 +0000 +++ opsin-2.3.1/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/SuffixApplier.java 2017-07-23 20:55:18.000000000 +0000 @@ -0,0 +1,619 @@ +package uk.ac.cam.ch.wwmm.opsin; + +import static uk.ac.cam.ch.wwmm.opsin.XmlDeclarations.*; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.HashSet; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Map; +import java.util.Map.Entry; +import java.util.Set; + +import uk.ac.cam.ch.wwmm.opsin.IsotopeSpecificationParser.IsotopeSpecification; + + +class SuffixApplier { + + private final BuildState state; + private final SuffixRules suffixRules; + + SuffixApplier(BuildState state, SuffixRules suffixRules) { + this.state = state; + this.suffixRules = suffixRules; + } + + /** + * Does suffixApplicability.xml have an entry for this group type? + * @param groupType + * @return + */ + boolean isGroupTypeWithSpecificSuffixRules(String groupType){ + return suffixRules.isGroupTypeWithSpecificSuffixRules(groupType); + } + + + /**Process the effects of suffixes upon a fragment. + * Unlocanted non-terminal suffixes are not attached yet. All other suffix effects are performed + * @param group The group element for the fragment to which the suffixes will be added + * @param suffixes The suffix elements for a fragment. + * @throws StructureBuildingException If the suffixes can't be resolved properly. + * @throws ComponentGenerationException + */ + void resolveSuffixes(Element group, List suffixes) throws StructureBuildingException, ComponentGenerationException { + Fragment frag = group.getFrag(); + List atomList = frag.getAtomList();//this instance of atomList will not change even once suffixes are merged into the fragment + String groupType = frag.getType(); + String subgroupType = frag.getSubType(); + String suffixTypeToUse = isGroupTypeWithSpecificSuffixRules(groupType) ? groupType : STANDARDGROUP_TYPE_VAL; + + List associatedSuffixFrags = state.xmlSuffixMap.get(group); + if (associatedSuffixFrags != null) {//null for non-final group in polycyclic spiro systems + associatedSuffixFrags.clear(); + } + Map> suffixValToSuffixes = new LinkedHashMap>();//effectively undoes the effect of multiplying out suffixes + for (Element suffix : suffixes) { + String suffixValue = suffix.getAttributeValue(VALUE_ATR); + List suffixesWithThisVal = suffixValToSuffixes.get(suffixValue); + if (suffixesWithThisVal == null) { + suffixesWithThisVal = new ArrayList(); + suffixValToSuffixes.put(suffixValue, suffixesWithThisVal); + } + suffixesWithThisVal.add(suffix); + + //Apply isotopes to suffixes if present + if (suffix.getFrag() != null) { + //boughton system applies to preceding suffix + //iupac system applies to following suffix + Element boughtonIsotopeSpecification = OpsinTools.getNextSibling(suffix); + if (boughtonIsotopeSpecification != null && boughtonIsotopeSpecification.getName().equals(ISOTOPESPECIFICATION_EL)) { + if (BOUGHTONSYSTEM_TYPE_VAL.equals(boughtonIsotopeSpecification.getAttributeValue(TYPE_ATR))) { + applyIsotopeToSuffix(suffix.getFrag(), boughtonIsotopeSpecification, false); + } + else { + throw new RuntimeException("Unexpected isotope specification after suffix"); + } + } + Element iupacIsotopeSpecification = OpsinTools.getPreviousSibling(suffix); + while (iupacIsotopeSpecification != null && iupacIsotopeSpecification.getName().equals(ISOTOPESPECIFICATION_EL) && + IUPACSYSTEM_TYPE_VAL.equals(iupacIsotopeSpecification.getAttributeValue(TYPE_ATR))) { + Element next = OpsinTools.getPreviousSibling(iupacIsotopeSpecification); + applyIsotopeToSuffix(suffix.getFrag(), iupacIsotopeSpecification, true); + iupacIsotopeSpecification = next; + } + } + } + + boolean reDetectCycles = false; + List fragsToMerge = new ArrayList(); + for (Entry> entry : suffixValToSuffixes.entrySet()) { + String suffixValue = entry.getKey(); + List suffixesWithThisVal = entry.getValue(); + List possibleAtomsToAttachSuffixTo = null; + List rulesToApply = suffixRules.getSuffixRuleTags(suffixTypeToUse, suffixValue, subgroupType); + for (int suffixIndex = 0; suffixIndex < suffixesWithThisVal.size(); suffixIndex++) { + Element suffix = suffixesWithThisVal.get(suffixIndex); + Fragment suffixFrag = null; + for (SuffixRule suffixRule : rulesToApply) { + switch (suffixRule.getType()) { + case addgroup: + if (suffixFrag == null) { + suffixFrag = suffix.getFrag(); + if (suffixFrag == null) { + throw new RuntimeException("OPSIN Bug: Suffix was expected to have an associated fragment but it wasn't found"); + } + Atom firstAtomInSuffix = suffixFrag.getFirstAtom(); + if (firstAtomInSuffix.getBondCount() <= 0) { + throw new ComponentGenerationException("OPSIN Bug: Dummy atom in suffix should have at least one bond to it"); + } + if (CYCLEFORMER_SUBTYPE_VAL.equals(suffix.getAttributeValue(SUBTYPE_ATR))){ + processCycleFormingSuffix(suffixFrag, frag, suffix); + reDetectCycles = true; + } + else{ + int bondOrderRequired = firstAtomInSuffix.getIncomingValency(); + Atom fragAtomToUse = getFragAtomToUse(frag, suffix, suffixTypeToUse); + if (fragAtomToUse == null) { + if (possibleAtomsToAttachSuffixTo == null) { + int substitutionsRequired = suffixesWithThisVal.size(); + possibleAtomsToAttachSuffixTo = FragmentTools.findnAtomsForSubstitution(frag, atomList.get(0), substitutionsRequired, bondOrderRequired, true); + if (possibleAtomsToAttachSuffixTo == null) { + throw new StructureBuildingException("No suitable atom found to attach " + suffixValue + " suffix"); + } + for (Atom atom : possibleAtomsToAttachSuffixTo) { + if (FragmentTools.isCharacteristicAtom(atom)){ + throw new StructureBuildingException("No suitable atom found to attach suffix"); + } + } + if ("yes".equals(suffixRule.getAttributeValue(SUFFIXRULES_KETONELOCANT_ATR)) && !atomList.get(0).getAtomIsInACycle()) { + List proKetoneAtoms = getProKetonePositions(possibleAtomsToAttachSuffixTo); + //Note that names like "ethanone" are allowable as the fragment may subsequently be substituted to form an actual ketone + if (proKetoneAtoms.size() >= substitutionsRequired) { + possibleAtomsToAttachSuffixTo = proKetoneAtoms; + } + } + if (!(substitutionsRequired == 1 && (ALKANESTEM_SUBTYPE_VAL.equals(frag.getSubType()) || HETEROSTEM_SUBTYPE_VAL.equals(frag.getSubType())) && possibleAtomsToAttachSuffixTo.get(0).equals(frag.getFirstAtom()))) { + if (AmbiguityChecker.isSubstitutionAmbiguous(possibleAtomsToAttachSuffixTo, substitutionsRequired)) { + state.addIsAmbiguous("Addition of " + suffixValue +" suffix to: " + group.getValue()); + } + } + } + fragAtomToUse = possibleAtomsToAttachSuffixTo.get(suffixIndex); + } + + //create a new bond and associate it with the suffixfrag and both atoms. Remember the suffixFrag has not been imported into the frag yet + List bonds = new ArrayList(firstAtomInSuffix.getBonds()); + for (Bond bondToSuffix : bonds) { + Atom suffixAtom = bondToSuffix.getOtherAtom(firstAtomInSuffix); + state.fragManager.createBond(fragAtomToUse, suffixAtom, bondToSuffix.getOrder()); + state.fragManager.removeBond(bondToSuffix); + if (fragAtomToUse.getIncomingValency() > 2 && (suffixValue.equals("aldehyde") || suffixValue.equals("al")|| suffixValue.equals("aldoxime"))){//formaldehyde/methanal are excluded as they are substitutable + if("X".equals(suffixAtom.getFirstLocant())){//carbaldehyde + suffixAtom.setProperty(Atom.ISALDEHYDE, true); + } + else{ + fragAtomToUse.setProperty(Atom.ISALDEHYDE, true); + } + } + } + } + } + else{ + throw new ComponentGenerationException("OPSIN bug: Suffix may only have one addgroup rule: " + suffix.getValue()); + } + break; + case changecharge: + int chargeChange = Integer.parseInt(suffixRule.getAttributeValue(SUFFIXRULES_CHARGE_ATR)); + int protonChange = Integer.parseInt(suffixRule.getAttributeValue(SUFFIXRULES_PROTONS_ATR)); + if (suffix.getAttribute(SUFFIXPREFIX_ATR) == null) { + Atom fragAtomToUse = getFragAtomToUse(frag, suffix, suffixTypeToUse); + if (fragAtomToUse != null) { + fragAtomToUse.addChargeAndProtons(chargeChange, protonChange); + } + else{ + applyUnlocantedChargeModification(atomList, chargeChange, protonChange); + } + } + else {//a suffix prefixed acylium suffix + if (suffixFrag == null) { + throw new StructureBuildingException("OPSIN bug: ordering of elements in suffixRules.xml wrong; changeCharge found before addGroup"); + } + Set bonds = state.fragManager.getInterFragmentBonds(suffixFrag); + if (bonds.size() != 1) { + throw new StructureBuildingException("OPSIN bug: Wrong number of bonds between suffix and group"); + } + for (Bond bond : bonds) { + if (bond.getFromAtom().getFrag() == suffixFrag) { + bond.getFromAtom().addChargeAndProtons(chargeChange, protonChange); + } else { + bond.getToAtom().addChargeAndProtons(chargeChange, protonChange); + } + } + } + break; + case setOutAtom: + String outValencyAtr = suffixRule.getAttributeValue(SUFFIXRULES_OUTVALENCY_ATR); + int outValency = outValencyAtr != null ? Integer.parseInt(outValencyAtr) : 1; + if (suffix.getAttribute(SUFFIXPREFIX_ATR) == null) { + Atom fragAtomToUse = getFragAtomToUse(frag, suffix, suffixTypeToUse); + if (fragAtomToUse != null) { + frag.addOutAtom(fragAtomToUse, outValency, true); + } else { + frag.addOutAtom(frag.getFirstAtom(), outValency, false); + } + } else {//something like oyl on a ring, which means it is now carbonyl and the outAtom is on the suffix and not frag + if (suffixFrag == null) { + throw new StructureBuildingException("OPSIN bug: ordering of elements in suffixRules.xml wrong; setOutAtom found before addGroup"); + } + Set bonds = state.fragManager.getInterFragmentBonds(suffixFrag); + if (bonds.size() != 1) { + throw new StructureBuildingException("OPSIN bug: Wrong number of bonds between suffix and group"); + } + for (Bond bond : bonds) { + if (bond.getFromAtom().getFrag() == suffixFrag) { + suffixFrag.addOutAtom(bond.getFromAtom(), outValency, true); + } else { + suffixFrag.addOutAtom(bond.getToAtom(), outValency, true); + } + } + } + break; + case setAcidicElement: + ChemEl chemEl = ChemEl.valueOf(suffixRule.getAttributeValue(SUFFIXRULES_ELEMENT_ATR)); + swapElementsSuchThatThisElementIsAcidic(suffixFrag, chemEl); + break; + case addSuffixPrefixIfNonePresentAndCyclic: + case addFunctionalAtomsToHydroxyGroups: + case chargeHydroxyGroups: + case removeTerminalOxygen: + case convertHydroxyGroupsToOutAtoms: + case convertHydroxyGroupsToPositiveCharge: + //already processed + break; + } + } + + if (suffixFrag != null) {//merge suffix frag and parent fragment + fragsToMerge.add(suffixFrag); + } + } + } + for (Fragment suffixFrag : fragsToMerge) { + state.fragManager.removeAtomAndAssociatedBonds(suffixFrag.getFirstAtom());//the dummy R atom + Set suffixLocants = new HashSet(suffixFrag.getLocants()); + for (String suffixLocant : suffixLocants) { + if (Character.isDigit(suffixLocant.charAt(0))){//check that numeric locants do not conflict with the parent fragment e.g. hydrazide 2' with biphenyl 2' + if (frag.hasLocant(suffixLocant)){ + suffixFrag.getAtomByLocant(suffixLocant).removeLocant(suffixLocant); + } + } + } + state.fragManager.incorporateFragment(suffixFrag, frag); + } + if (reDetectCycles) { + CycleDetector.assignWhetherAtomsAreInCycles(frag); + } + + } + + private void applyIsotopeToSuffix(Fragment frag, Element isotopeSpecification, boolean mustBeApplied) throws StructureBuildingException { + IsotopeSpecification isotopeSpec = IsotopeSpecificationParser.parseIsotopeSpecification(isotopeSpecification); + ChemEl chemEl = isotopeSpec.getChemEl(); + int isotope = isotopeSpec.getIsotope(); + int multiplier = isotopeSpec.getMultiplier(); + String[] locants = isotopeSpec.getLocants(); + if (locants != null && !mustBeApplied) { + //locanted boughton isotope probably applies to the group rather than the suffix + return; + } + if (locants == null) { + List atoms = frag.getAtomList(); + atoms.remove(0); + if (chemEl == ChemEl.H) { + List parentAtomsToApplyTo = FragmentTools.findnAtomsForSubstitution(atoms, null, multiplier, 1, true); + if (parentAtomsToApplyTo == null) { + if (mustBeApplied) { + throw new StructureBuildingException("Failed to find sufficient hydrogen atoms for unlocanted hydrogen isotope replacement"); + } + else { + return; + } + } + if (AmbiguityChecker.isSubstitutionAmbiguous(parentAtomsToApplyTo, multiplier)) { + state.addIsAmbiguous("Position of hydrogen isotope on " + frag.getTokenEl().getValue()); + } + for (int j = 0; j < multiplier; j++) { + Atom atomWithHydrogenIsotope = parentAtomsToApplyTo.get(j); + Atom hydrogen = state.fragManager.createAtom(isotopeSpec.getChemEl(), frag); + hydrogen.setIsotope(isotope); + state.fragManager.createBond(atomWithHydrogenIsotope, hydrogen, 1); + } + } + else { + List parentAtomsToApplyTo = new ArrayList(); + for (Atom atom : atoms) { + if (atom.getElement() == chemEl) { + parentAtomsToApplyTo.add(atom); + } + } + if (parentAtomsToApplyTo.size() < multiplier) { + if(mustBeApplied) { + throw new StructureBuildingException("Failed to find sufficient atoms for " + chemEl.toString() + " isotope replacement"); + } + else { + return; + } + } + if (AmbiguityChecker.isSubstitutionAmbiguous(parentAtomsToApplyTo, multiplier)) { + state.addIsAmbiguous("Position of isotope on " + frag.getTokenEl().getValue()); + } + for (int j = 0; j < multiplier; j++) { + parentAtomsToApplyTo.get(j).setIsotope(isotope); + } + } + } + else { + if (chemEl == ChemEl.H) { + for (int j = 0; j < locants.length; j++) { + Atom atomWithHydrogenIsotope = frag.getAtomByLocantOrThrow(locants[j]); + Atom hydrogen = state.fragManager.createAtom(isotopeSpec.getChemEl(), frag); + hydrogen.setIsotope(isotope); + state.fragManager.createBond(atomWithHydrogenIsotope, hydrogen, 1); + } + } + else { + for (int j = 0; j < locants.length; j++) { + Atom atom = frag.getAtomByLocantOrThrow(locants[j]); + if (chemEl != atom.getElement()) { + throw new StructureBuildingException("The atom at locant: " + locants[j] + " was not a " + chemEl.toString() ); + } + atom.setIsotope(isotope); + } + } + } + isotopeSpecification.detach(); + } + + + /** + * Return the subset of atoms that are "pro-ketone" + * i.e. a [CD2](C)C + * @param atoms + * @return + */ + private List getProKetonePositions(List atoms) { + List proKetonePositions = new ArrayList(); + for (Atom atom : atoms) { + List bonds = atom.getBonds(); + if (bonds.size() == 2 && + bonds.get(0).getOrder() == 1 && + bonds.get(1).getOrder() == 1 && + bonds.get(0).getOtherAtom(atom).getElement() == ChemEl.C && + bonds.get(1).getOtherAtom(atom).getElement() == ChemEl.C) { + proKetonePositions.add(atom); + } + } + return proKetonePositions; + } + + private void processCycleFormingSuffix(Fragment suffixFrag, Fragment suffixableFragment, Element suffix) throws StructureBuildingException, ComponentGenerationException { + List rAtoms = new ArrayList(); + for (Atom a : suffixFrag.getAtomList()) { + if (a.getElement() == ChemEl.R){ + rAtoms.add(a); + } + } + if (rAtoms.size() != 2){ + throw new ComponentGenerationException("OPSIN bug: Incorrect number of R atoms associated with cyclic suffix"); + } + if (rAtoms.get(0).getBondCount() <= 0 || rAtoms.get(1).getBondCount() <= 0) { + throw new ComponentGenerationException("OPSIN Bug: Dummy atoms in suffix should have at least one bond to them"); + } + + Atom parentAtom1; + Atom parentAtom2; + + String locant = suffix.getAttributeValue(LOCANT_ATR); + String locantId = suffix.getAttributeValue(LOCANTID_ATR); + if (locant != null){ + String[] locants = locant.split(","); + if (locants.length ==2){ + parentAtom1 = suffixableFragment.getAtomByLocantOrThrow(locants[0]); + parentAtom2 = suffixableFragment.getAtomByLocantOrThrow(locants[1]); + } + else if (locants.length ==1){ + parentAtom1 = suffixableFragment.getAtomByLocantOrThrow("1"); + parentAtom2 = suffixableFragment.getAtomByLocantOrThrow(locants[0]); + } + else{ + throw new ComponentGenerationException("Incorrect number of locants associated with cycle forming suffix, expected 2 found: " + locants.length); + } + } + else if (locantId !=null) { + String[] locantIds = locantId.split(","); + if (locantIds.length !=2){ + throw new ComponentGenerationException("OPSIN bug: Should be exactly 2 locants associated with a cyclic suffix"); + } + parentAtom1 = suffixableFragment.getAtomByIDOrThrow(Integer.parseInt(locantIds[0])); + parentAtom2 = suffixableFragment.getAtomByIDOrThrow(Integer.parseInt(locantIds[1])); + } + else{ + int chainLength = suffixableFragment.getChainLength(); + if (chainLength > 1 && chainLength == suffixableFragment.getAtomCount()){ + parentAtom1 = suffixableFragment.getAtomByLocantOrThrow("1"); + parentAtom2 = suffixableFragment.getAtomByLocantOrThrow(String.valueOf(chainLength)); + } + else{ + List hydroxyAtoms = FragmentTools.findHydroxyGroups(suffixableFragment); + if (hydroxyAtoms.size() == 1 && suffixableFragment.getAtomByLocant("1") != null){ + parentAtom1 = suffixableFragment.getAtomByLocantOrThrow("1"); + parentAtom2 = hydroxyAtoms.get(0); + } + else{ + throw new ComponentGenerationException("cycle forming suffix: " + suffix.getValue() +" should be locanted!"); + } + } + } + if (parentAtom1.equals(parentAtom2)){ + throw new ComponentGenerationException("cycle forming suffix: " + suffix.getValue() +" attempted to form a cycle involving the same atom twice!"); + } + + if (suffixableFragment.getType().equals(CARBOHYDRATE_TYPE_VAL)){ + FragmentTools.removeTerminalOxygen(state, parentAtom1, 2); + FragmentTools.removeTerminalOxygen(state, parentAtom1, 1); + List chainHydroxy = FragmentTools.findHydroxyLikeTerminalAtoms(parentAtom2.getAtomNeighbours(), ChemEl.O); + if (chainHydroxy.size() == 1){ + FragmentTools.removeTerminalAtom(state, chainHydroxy.get(0));//make sure to retain stereochemistry + } + else{ + throw new ComponentGenerationException("The second locant of a carbohydrate lactone should point to a carbon in the chain with a hydroxyl group"); + } + } + else{ + if (parentAtom2.getElement() == ChemEl.O){//cyclic suffixes like lactone formally indicate the removal of hydroxy cf. 1979 rule 472.1 + //...although in most cases they are used on structures that don't actually have a hydroxy group + List neighbours = parentAtom2.getAtomNeighbours(); + if (neighbours.size()==1){ + List suffixNeighbours = rAtoms.get(1).getAtomNeighbours(); + if (suffixNeighbours.size()==1 && suffixNeighbours.get(0).getElement() == ChemEl.O){ + state.fragManager.removeAtomAndAssociatedBonds(parentAtom2); + parentAtom2 = neighbours.get(0); + } + } + } + } + makeBondsToSuffix(parentAtom1, rAtoms.get(0)); + makeBondsToSuffix(parentAtom2, rAtoms.get(1)); + state.fragManager.removeAtomAndAssociatedBonds(rAtoms.get(1)); + } + + private Atom getFragAtomToUse(Fragment frag, Element suffix, String suffixTypeToUse) throws StructureBuildingException { + String locant = suffix.getAttributeValue(LOCANT_ATR); + if (locant != null) { + return frag.getAtomByLocantOrThrow(locant); + } + String locantId = suffix.getAttributeValue(LOCANTID_ATR); + if (locantId != null) { + return frag.getAtomByIDOrThrow(Integer.parseInt(locantId)); + } + String defaultLocantId = suffix.getAttributeValue(DEFAULTLOCANTID_ATR); + if (defaultLocantId != null) { + return frag.getAtomByIDOrThrow(Integer.parseInt(defaultLocantId)); + } + else if (suffixTypeToUse.equals(ACIDSTEM_TYPE_VAL) || suffixTypeToUse.equals(NONCARBOXYLICACID_TYPE_VAL) || suffixTypeToUse.equals(CHALCOGENACIDSTEM_TYPE_VAL)) {//means that e.g. sulfonyl, has an explicit outAtom + return frag.getFirstAtom(); + } + return null; + } + + /** + * Preference is given to mono cation/anions as they are expected to be more likely + * Additionally, Typically if a locant has not been specified then it was intended to refer to a nitrogen even if the nitrogen is not at locant 1 e.g. isoquinolinium + * Hence preference is given to nitrogen atoms and then to non carbon atoms + * @param atomList + * @param chargeChange + * @param protonChange + */ + private void applyUnlocantedChargeModification(List atomList, int chargeChange, int protonChange) { + //List of atoms that can accept this charge while remaining in a reasonable valency + List nitrogens = new ArrayList();//most likely + List otherHeteroatoms = new ArrayList();//plausible + List carbonsAtoms = new ArrayList();//rare + List chargedAtoms = new ArrayList();//very rare + if (atomList.isEmpty()) { + throw new RuntimeException("OPSIN Bug: List of atoms to add charge suffix to was empty"); + } + for (Atom a : atomList) { + ChemEl chemEl = a.getElement(); + Integer[] stableValencies = ValencyChecker.getPossibleValencies(chemEl, a.getCharge() + chargeChange); + if (stableValencies == null) {//unstable valency so seems unlikely + continue; + } + int resultantExpectedValency = (a.getLambdaConventionValency() ==null ? ValencyChecker.getDefaultValency(chemEl) : a.getLambdaConventionValency()) + a.getProtonsExplicitlyAddedOrRemoved() + protonChange; + + if (!Arrays.asList(stableValencies).contains(resultantExpectedValency)) { + //unstable valency so seems unlikely + continue; + } + if (protonChange < 0) { + int substitableHydrogen = StructureBuildingMethods.calculateSubstitutableHydrogenAtoms(a); + if (a.hasSpareValency() && !a.getFrag().getIndicatedHydrogen().contains(a)) { + substitableHydrogen--; + } + if (substitableHydrogen < 1) { + //no hydrogens so operation can't remove one! + continue; + } + } + if (a.getCharge() == 0) { + if (chemEl == ChemEl.N) { + nitrogens.add(a); + } + else if (chemEl != ChemEl.C) { + otherHeteroatoms.add(a); + } + else { + carbonsAtoms.add(a); + } + } + else { + chargedAtoms.add(a); + } + } + List listFromWhichToChoose; + if (!nitrogens.isEmpty()) { + listFromWhichToChoose = nitrogens; + if (AMINOACID_TYPE_VAL.equals(atomList.get(0).getFrag().getType())) { + //By convention treat names like lysinium as unambiguous (prefer alpha nitrogen) + if (listFromWhichToChoose.contains(atomList.get(0))){ + listFromWhichToChoose = new ArrayList(); + listFromWhichToChoose.add(atomList.get(0)); + } + } + } + else if (!otherHeteroatoms.isEmpty()) { + listFromWhichToChoose = otherHeteroatoms; + } + else if (!carbonsAtoms.isEmpty()) { + listFromWhichToChoose = carbonsAtoms; + } + else if (!chargedAtoms.isEmpty()) { + listFromWhichToChoose = chargedAtoms; + } + else { + listFromWhichToChoose = atomList; + } + + Atom chosenAtom = listFromWhichToChoose.get(0); + if (!AmbiguityChecker.allAtomsEquivalent(listFromWhichToChoose)) { + state.addIsAmbiguous("Addition of charge suffix to: " + chosenAtom.getFrag().getTokenEl().getValue()); + } + + chosenAtom.addChargeAndProtons(chargeChange, protonChange); + } + + + /** + * e.g. if element is "S" changes C(=S)O -->C(=O)S + * @param frag + * @param chemEl + * @throws StructureBuildingException + */ + private void swapElementsSuchThatThisElementIsAcidic(Fragment frag, ChemEl chemEl) throws StructureBuildingException { + for (int i = 0, l =frag.getFunctionalAtomCount(); i < l; i++) { + Atom atom = frag.getFunctionalAtom(i).getAtom(); + Set ambiguouslyElementedAtoms = atom.getProperty(Atom.AMBIGUOUS_ELEMENT_ASSIGNMENT); + if (ambiguouslyElementedAtoms != null) { + Atom atomToSwapWith = null; + for (Atom ambiguouslyElementedAtom : ambiguouslyElementedAtoms) { + if (ambiguouslyElementedAtom.getElement() == chemEl){ + atomToSwapWith = ambiguouslyElementedAtom; + break; + } + } + if (atomToSwapWith != null) { + if (atomToSwapWith != atom) { + //swap locants and element type + List tempLocants1 = new ArrayList(atom.getLocants()); + List tempLocants2 = new ArrayList(atomToSwapWith.getLocants()); + atom.clearLocants(); + atomToSwapWith.clearLocants(); + for (String locant : tempLocants1) { + atomToSwapWith.addLocant(locant); + } + for (String locant : tempLocants2) { + atom.addLocant(locant); + } + ChemEl a2ChemEl = atomToSwapWith.getElement(); + atomToSwapWith.setElement(atom.getElement()); + atom.setElement(a2ChemEl); + ambiguouslyElementedAtoms.remove(atomToSwapWith); + } + ambiguouslyElementedAtoms.remove(atom); + return; + } + } + } + throw new StructureBuildingException("Unable to find potential acidic atom with element: " + chemEl); + } + + /** + * Creates bonds between the parentAtom and the atoms connected to the R atoms. + * Removes bonds to the R atom + * @param parentAtom + * @param suffixRAtom + */ + private void makeBondsToSuffix(Atom parentAtom, Atom suffixRAtom) { + List bonds = new ArrayList(suffixRAtom.getBonds()); + for (Bond bondToSuffix : bonds) { + Atom suffixAtom = bondToSuffix.getOtherAtom(suffixRAtom); + state.fragManager.createBond(parentAtom, suffixAtom, bondToSuffix.getOrder()); + state.fragManager.removeBond(bondToSuffix); + } + } + + List getSuffixRuleTags(String suffixTypeToUse, String suffixValue, String subgroupType) throws ComponentGenerationException { + return suffixRules.getSuffixRuleTags(suffixTypeToUse, suffixValue, subgroupType); + } +} diff -Nru opsin-1.5.0/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/SuffixRule.java opsin-2.3.1/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/SuffixRule.java --- opsin-1.5.0/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/SuffixRule.java 1970-01-01 00:00:00.000000000 +0000 +++ opsin-2.3.1/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/SuffixRule.java 2017-07-23 20:55:18.000000000 +0000 @@ -0,0 +1,33 @@ +package uk.ac.cam.ch.wwmm.opsin; + +import java.util.List; + + class SuffixRule { + + private final SuffixRuleType type; + private final List attributes; + + SuffixRule(SuffixRuleType type, List attributes) { + this.type = type; + this.attributes = attributes; + } + + SuffixRuleType getType() { + return type; + } + + /** + * Returns the value of the attribute with the given name + * or null if the attribute doesn't exist + * @param name + * @return + */ + String getAttributeValue(String name) { + for (Attribute a : attributes) { + if (a.getName().equals(name)) { + return a.getValue(); + } + } + return null; + } +} diff -Nru opsin-1.5.0/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/SuffixRules.java opsin-2.3.1/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/SuffixRules.java --- opsin-1.5.0/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/SuffixRules.java 2013-07-21 14:02:29.000000000 +0000 +++ opsin-2.3.1/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/SuffixRules.java 2017-07-23 20:55:18.000000000 +0000 @@ -6,103 +6,176 @@ import java.util.ArrayList; import java.util.HashMap; import java.util.List; +import java.util.Map; + +import javax.xml.stream.XMLStreamConstants; +import javax.xml.stream.XMLStreamException; +import javax.xml.stream.XMLStreamReader; -import nu.xom.Document; -import nu.xom.Element; -import nu.xom.Elements; class SuffixRules { - /**For a given group type what suffixes are applicable. Due to group subTypes altering suffix meaning, the same suffixValue maps to one or more suffixes*/ - private final HashMap>> suffixApplicability; - /**A mapping between suffix rule names and elements containing the rules for applying the corresponding suffix*/ - private final HashMap suffixRules; - - SuffixRules(ResourceGetter resourceGetter) throws IOException{ - suffixApplicability = generateSuffixApplicabilityMap(resourceGetter); - suffixRules = generateSuffixRulesMap(resourceGetter); - } - - private HashMap>> generateSuffixApplicabilityMap(ResourceGetter resourceGetter) throws IOException { - Document suffixApplicabilityDoc = resourceGetter.getXMLDocument("suffixApplicability.xml"); - HashMap>> suffixApplicability = new HashMap>>(); - Elements groupTypes = suffixApplicabilityDoc.getRootElement().getChildElements(SUFFIXAPPLICABILITY_GROUPTYPE_EL); - for (int i = 0; i < groupTypes.size(); i++) { - Element groupType =groupTypes.get(i); - Elements suffixes = groupType.getChildElements(SUFFIXAPPLICABILITY_SUFFIX_EL); - HashMap> suffixToRuleMap= new HashMap>(); - for (int j = 0; j < suffixes.size(); j++) { - Element suffix =suffixes.get(j); - String suffixValue= suffix.getAttributeValue(SUFFIXAPPLICABILITY_VALUE_ATR); - if (suffixToRuleMap.get(suffixValue)!=null){//can have multiple entries if subType attribute is set - suffixToRuleMap.get(suffixValue).add(suffix); + /**For a given group type what suffixes are applicable. + * Within this group type which are applicable for a given suffixValue + * Returns a list as different group subTypes can give different meanings*/ + private final Map>> suffixApplicability; + + private static class ApplicableSuffix { + + private final String requiredSubType; + private final List suffixRules; + + public ApplicableSuffix(String requiredSubType, List suffixRules) { + this.requiredSubType = requiredSubType; + this.suffixRules = suffixRules; + } + } + + SuffixRules(ResourceGetter resourceGetter) throws IOException { + Map> suffixRulesMap = generateSuffixRulesMap(resourceGetter); + suffixApplicability = generateSuffixApplicabilityMap(resourceGetter, suffixRulesMap); + } + + private Map> generateSuffixRulesMap(ResourceGetter resourceGetter) throws IOException { + Map> suffixRulesMap = new HashMap>(); + XMLStreamReader reader = resourceGetter.getXMLStreamReader("suffixRules.xml"); + try { + while (reader.hasNext()) { + if (reader.next() == XMLStreamConstants.START_ELEMENT && + reader.getLocalName().equals(SUFFIXRULES_RULE_EL)) { + String ruleValue = reader.getAttributeValue(null, SUFFIXRULES_VALUE_ATR); + if (suffixRulesMap.get(ruleValue) != null) { + throw new RuntimeException("Suffix: " + ruleValue + " appears multiple times in suffixRules.xml"); + } + suffixRulesMap.put(ruleValue, processSuffixRules(reader)); + } + } + } + catch (XMLStreamException e) { + throw new IOException("Parsing exception occurred while reading suffixRules.xml", e); + } + finally { + try { + reader.close(); + } catch (XMLStreamException e) { + throw new IOException("Parsing exception occurred while reading suffixRules.xml", e); + } + } + return suffixRulesMap; + } + + + private List processSuffixRules(XMLStreamReader reader) throws XMLStreamException { + String startingElName = reader.getLocalName(); + List rules = new ArrayList(); + while (reader.hasNext()) { + switch (reader.next()) { + case XMLStreamConstants.START_ELEMENT: + String tagName = reader.getLocalName(); + SuffixRuleType type = SuffixRuleType.valueOf(tagName); + List attributes = new ArrayList(); + for (int i = 0, l = reader.getAttributeCount(); i < l; i++) { + attributes.add(new Attribute(reader.getAttributeLocalName(i), reader.getAttributeValue(i))); } - else{ - List suffixList =new ArrayList(); - suffixList.add(suffix); - suffixToRuleMap.put(suffixValue, suffixList); + rules.add(new SuffixRule(type, attributes)); + break; + case XMLStreamConstants.END_ELEMENT: + if (reader.getLocalName().equals(startingElName)) { + return rules; } + break; } - suffixApplicability.put(groupType.getAttributeValue(SUFFIXAPPLICABILITY_TYPE_ATR), suffixToRuleMap); } - return suffixApplicability; + throw new RuntimeException("Malformed suffixRules.xml"); } - private HashMap generateSuffixRulesMap(ResourceGetter resourceGetter) throws IOException { - Document suffixRulesDoc = resourceGetter.getXMLDocument("suffixRules.xml"); - HashMap suffixRules = new HashMap(); - Elements rules = suffixRulesDoc.getRootElement().getChildElements(SUFFIXRULES_RULE_EL); - for (int i = 0; i < rules.size(); i++) { - Element rule =rules.get(i); - String ruleValue=rule.getAttributeValue(SUFFIXRULES_VALUE_ATR); - if (suffixRules.get(ruleValue)!=null){ - throw new RuntimeException("Suffix: " +ruleValue +" appears multiple times in suffixRules.xml"); + private Map>> generateSuffixApplicabilityMap(ResourceGetter resourceGetter, Map> suffixRulesMap) throws IOException { + Map>> suffixApplicability = new HashMap>>(); + XMLStreamReader reader = resourceGetter.getXMLStreamReader("suffixApplicability.xml"); + try { + while (reader.hasNext()) { + if (reader.next() == XMLStreamConstants.START_ELEMENT && + reader.getLocalName().equals(SUFFIXAPPLICABILITY_GROUPTYPE_EL)) { + Map> suffixToRuleMap = new HashMap>(); + suffixApplicability.put(reader.getAttributeValue(null, SUFFIXAPPLICABILITY_TYPE_ATR), suffixToRuleMap); + while (reader.hasNext()) { + int event = reader.next(); + if (event == XMLStreamConstants.START_ELEMENT && + reader.getLocalName().equals(SUFFIXAPPLICABILITY_SUFFIX_EL)) { + String suffixValue = reader.getAttributeValue(null, SUFFIXAPPLICABILITY_VALUE_ATR); + List suffixList = suffixToRuleMap.get(suffixValue); + //can have multiple entries if subType attribute is set + if (suffixToRuleMap.get(suffixValue) == null){ + suffixList = new ArrayList(); + suffixToRuleMap.put(suffixValue, suffixList); + } + String requiredSubType = reader.getAttributeValue(null, SUFFIXAPPLICABILITY_SUBTYPE_ATR); + String suffixRuleName = reader.getElementText(); + List suffixRules = suffixRulesMap.get(suffixRuleName); + if (suffixRules == null) { + throw new RuntimeException("Suffix: " + suffixRuleName +" does not have a rule associated with it in suffixRules.xml"); + } + suffixList.add(new ApplicableSuffix(requiredSubType, suffixRules)); + } + else if (event == XMLStreamConstants.END_ELEMENT && + reader.getLocalName().equals(SUFFIXAPPLICABILITY_GROUPTYPE_EL)) { + break; + } + } + } } - suffixRules.put(ruleValue, rule); } - return suffixRules; + catch (XMLStreamException e) { + throw new IOException("Parsing exception occurred while reading suffixApplicability.xml", e); + } + finally { + try { + reader.close(); + } catch (XMLStreamException e) { + throw new IOException("Parsing exception occurred while reading suffixApplicability.xml", e); + } + } + return suffixApplicability; } + /** - * Returns the appropriate suffixRule tags for the given arguments. - * The suffix rule tags are the children of the appropriate rule in suffixRules.xml + * Returns the appropriate suffixRules for the given arguments. + * The suffix rules are the children of the appropriate rule in suffixRules.xml * @param suffixTypeToUse * @param suffixValue * @param subgroupType * @return * @throws ComponentGenerationException */ - Elements getSuffixRuleTags(String suffixTypeToUse, String suffixValue, String subgroupType) throws ComponentGenerationException { - HashMap> groupToSuffixMap = suffixApplicability.get(suffixTypeToUse); - if (groupToSuffixMap==null){ - throw new ComponentGenerationException("Suffix Type: "+ suffixTypeToUse + " does not have a corresponding groupType entry in suffixApplicability.xml"); - } - List potentiallyApplicableSuffixes =groupToSuffixMap.get(suffixValue); - if(potentiallyApplicableSuffixes==null || potentiallyApplicableSuffixes.size()==0 ) { - throw new ComponentGenerationException("Suffix: " +suffixValue +" does not apply to the group it was associated with (type: "+ suffixTypeToUse + ") according to suffixApplicability.xml"); - } - Element chosenSuffix=null; - for (Element suffix : potentiallyApplicableSuffixes) { - if (suffix.getAttribute(SUFFIXAPPLICABILITY_SUBTYPE_ATR) != null) { - if (!suffix.getAttributeValue(SUFFIXAPPLICABILITY_SUBTYPE_ATR).equals(subgroupType)) { - continue; - } - } - if (chosenSuffix != null) { - throw new ComponentGenerationException("Suffix: " + suffixValue + " appears multiple times in suffixApplicability.xml"); - } - chosenSuffix = suffix; - } - if (chosenSuffix==null){ - throw new ComponentGenerationException("Suffix: " +suffixValue +" does not apply to the group it was associated with (type: "+ suffixTypeToUse + ") due to the group's subType: "+ subgroupType +" according to suffixApplicability.xml"); - } - Element rule =suffixRules.get(chosenSuffix.getValue()); - if(rule ==null) { - throw new ComponentGenerationException("Suffix: " +chosenSuffix.getValue() +" does not have a rule associated with it in suffixRules.xml"); + List getSuffixRuleTags(String suffixTypeToUse, String suffixValue, String subgroupType) throws ComponentGenerationException { + Map> groupToSuffixMap = suffixApplicability.get(suffixTypeToUse); + if (groupToSuffixMap == null){ + throw new ComponentGenerationException("Suffix Type: " + suffixTypeToUse + " does not have a corresponding groupType entry in suffixApplicability.xml"); + } + List potentiallyApplicableSuffixes = groupToSuffixMap.get(suffixValue); + if(potentiallyApplicableSuffixes == null || potentiallyApplicableSuffixes.size() == 0 ) { + throw new ComponentGenerationException("Suffix: " + suffixValue + " does not apply to the group it was associated with (type: " + suffixTypeToUse + ") according to suffixApplicability.xml"); + } + List suffixRules = null; + for (ApplicableSuffix suffix : potentiallyApplicableSuffixes) { + if (suffix.requiredSubType != null) { + if (!suffix.requiredSubType.equals(subgroupType)) { + continue; + } + } + if (suffixRules != null) { + throw new ComponentGenerationException("Suffix: " + suffixValue + " appears multiple times in suffixApplicability.xml"); + } + suffixRules = suffix.suffixRules; + } + if (suffixRules == null){ + throw new ComponentGenerationException("Suffix: " +suffixValue +" does not apply to the group it was associated with (type: "+ suffixTypeToUse + ") due to the group's subType: "+ subgroupType +" according to suffixApplicability.xml"); } - return rule.getChildElements(); + return suffixRules; } + /** * Does suffixApplicability.xml have an entry for this group type? * @param groupType @@ -111,4 +184,6 @@ boolean isGroupTypeWithSpecificSuffixRules(String groupType){ return suffixApplicability.containsKey(groupType); } + + } diff -Nru opsin-1.5.0/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/SuffixRuleType.java opsin-2.3.1/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/SuffixRuleType.java --- opsin-1.5.0/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/SuffixRuleType.java 1970-01-01 00:00:00.000000000 +0000 +++ opsin-2.3.1/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/SuffixRuleType.java 2017-07-23 20:55:18.000000000 +0000 @@ -0,0 +1,14 @@ +package uk.ac.cam.ch.wwmm.opsin; + +enum SuffixRuleType { + addgroup, + addSuffixPrefixIfNonePresentAndCyclic, + setOutAtom, + changecharge, + addFunctionalAtomsToHydroxyGroups, + chargeHydroxyGroups, + removeTerminalOxygen, + convertHydroxyGroupsToOutAtoms, + convertHydroxyGroupsToPositiveCharge, + setAcidicElement +} diff -Nru opsin-1.5.0/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/TokenEl.java opsin-2.3.1/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/TokenEl.java --- opsin-1.5.0/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/TokenEl.java 1970-01-01 00:00:00.000000000 +0000 +++ opsin-2.3.1/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/TokenEl.java 2017-07-23 20:55:18.000000000 +0000 @@ -0,0 +1,119 @@ +package uk.ac.cam.ch.wwmm.opsin; + +import java.util.Collections; +import java.util.List; + +class TokenEl extends Element { + + private String value; + private Fragment frag; + + TokenEl(String name) { + super(name); + this.value = ""; + } + + TokenEl(String name, String value) { + super(name); + this.value = value; + } + + @Override + void addChild(Element child) { + throw new UnsupportedOperationException("Tokens do not have children"); + } + + @Override + Element copy() { + TokenEl copy = new TokenEl(this.name, this.value); + for (int i = 0, len = this.attributes.size(); i < len; i++) { + Attribute atr = this.attributes.get(i); + copy.addAttribute(new Attribute(atr)); + } + return copy; + } + + /** + * Creates a copy with no parent + * The provided value is used instead of the Element to be copied's value + * @param value + * @return + */ + TokenEl copy(String value) { + TokenEl copy = new TokenEl(this.name, value); + for (int i = 0, len = this.attributes.size(); i < len; i++) { + Attribute atr = this.attributes.get(i); + copy.addAttribute(new Attribute(atr)); + } + return copy; + } + + @Override + Element getChild(int index) { + throw new UnsupportedOperationException("Tokens do not have children"); + } + + @Override + int getChildCount() { + return 0; + } + + @Override + List getChildElements() { + return Collections.emptyList(); + } + + @Override + List getChildElements(String name) { + return Collections.emptyList(); + } + + @Override + Element getFirstChildElement(String name) { + return null; + } + + @Override + Fragment getFrag() { + return frag; + } + + String getValue() { + return value; + } + + @Override + int indexOf(Element child) { + return -1; + } + + @Override + void insertChild(Element child, int index) { + throw new UnsupportedOperationException("Tokens do not have children"); + } + + @Override + boolean removeChild(Element child) { + throw new UnsupportedOperationException("Tokens do not have children"); + } + + @Override + Element removeChild(int index) { + throw new UnsupportedOperationException("Tokens do not have children"); + } + + @Override + void replaceChild(Element oldChild, Element newChild) { + throw new UnsupportedOperationException("Tokens do not have children"); + } + + @Override + void setFrag(Fragment frag) { + this.frag = frag; + } + + void setValue(String text) { + this.value = text; + } + +} diff -Nru opsin-1.5.0/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/Tokeniser.java opsin-2.3.1/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/Tokeniser.java --- opsin-1.5.0/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/Tokeniser.java 2013-07-21 14:02:29.000000000 +0000 +++ opsin-2.3.1/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/Tokeniser.java 2017-07-23 20:55:18.000000000 +0000 @@ -110,7 +110,7 @@ return parseTokens.size()>0 && (result.isFullyInterpretable() || result.getWorkingName().charAt(0) ==' ' || result.getWorkingName().charAt(0) =='-'); } - private void parseWord(TokenizationResult result, List parseTokens, String parsedName, boolean reverse) throws ParsingException { + private void parseWord(TokenizationResult result, List parseTokens, String parsedName, boolean reverse) { //If something like ethylchloride is encountered this should be split back to ethyl chloride and there will be 2 ParseWords returned //In cases of properly formed names there will be only one ParseWord //If there are two parses one of which assumes a missing space and one of which does not the former is discarded @@ -140,7 +140,7 @@ } } - private void addParseWords(List parseTokens, String parsedName, Parse parse, boolean reverse) throws ParsingException { + private void addParseWords(List parseTokens, String parsedName, Parse parse, boolean reverse) { List parseWords = WordTools.splitIntoParseWords(parseTokens, parsedName); if (reverse) { diff -Nru opsin-1.5.0/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/TokenizationResult.java opsin-2.3.1/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/TokenizationResult.java --- opsin-1.5.0/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/TokenizationResult.java 2013-07-21 14:02:29.000000000 +0000 +++ opsin-2.3.1/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/TokenizationResult.java 2017-07-23 20:55:18.000000000 +0000 @@ -7,7 +7,7 @@ */ class TokenizationResult { - private Parse parse; + private final Parse parse; private String workingName; private String unparsableName; private String unparsedName; diff -Nru opsin-1.5.0/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/Token.java opsin-2.3.1/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/Token.java --- opsin-1.5.0/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/Token.java 2013-07-21 14:02:29.000000000 +0000 +++ opsin-2.3.1/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/Token.java 1970-01-01 00:00:00.000000000 +0000 @@ -1,77 +0,0 @@ -package uk.ac.cam.ch.wwmm.opsin; - -import nu.xom.Attribute; -import nu.xom.Element; -import static uk.ac.cam.ch.wwmm.opsin.XmlDeclarations.*; - -/**A token in a chemical name. hex, yl, ane, chloro etc. - * Stores information about the XML element that will be produced for the token. - * - * @author ptc24 - * @author dl387 - * - */ -class Token { - - /**A reference copy of the XML element to produce*/ - private final Element elem; - - /**Should this token actually be used. Set to true for meaningless tokens e.g. e, o, endOfSubstituent etc.*/ - private boolean ignoreWhenWritingXML =false; - - - /** - * Makes a new token using a regexToken Element - * @param regexTokenElement - */ - Token(Element regexTokenElement) { - elem = new Element(regexTokenElement.getAttributeValue("tagname")); - if (regexTokenElement.getAttribute("value")!=null){ - elem.addAttribute(new Attribute(VALUE_ATR, regexTokenElement.getAttributeValue("value"))); - } - if (regexTokenElement.getAttribute("type")!=null){ - elem.addAttribute(new Attribute(TYPE_ATR, regexTokenElement.getAttributeValue("type"))); - } - if (regexTokenElement.getAttribute("subType")!=null){ - elem.addAttribute(new Attribute(SUBTYPE_ATR, regexTokenElement.getAttributeValue("subType"))); - } - if ("yes".equals(regexTokenElement.getAttributeValue("ignoreWhenWritingXML"))){ - ignoreWhenWritingXML=true; - } - } - - /**Makes a new Token based on reference elements from an XML file. - * - * @param tokenElement The token element in the XML tokens file. - * @param tokenList The tokenList element the token was taken from. - */ - Token(Element tokenElement, Element tokenList) { - elem = OpsinTools.shallowCopy(tokenElement); - elem.setLocalName(tokenList.getAttributeValue("tagname")); - if(tokenList.getAttribute("type") != null) { - elem.addAttribute(new Attribute(TYPE_ATR, tokenList.getAttributeValue("type"))); - } - if(tokenList.getAttribute("subType") != null) { - elem.addAttribute(new Attribute(SUBTYPE_ATR, tokenList.getAttributeValue("subType"))); - } - if ("yes".equals(tokenList.getAttributeValue("ignoreWhenWritingXML"))){ - ignoreWhenWritingXML=true; - } - } - - /**Makes an XML element of the token. - * - * @param text The string to go in the Text node contained within the Element. - * @return The element produced. - */ - Element makeElement(String text) { - if (!ignoreWhenWritingXML){ - Element tokenElement = OpsinTools.shallowCopy(elem); - tokenElement.appendChild(text); - return tokenElement; - } - else{ - return null; - } - } -} diff -Nru opsin-1.5.0/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/ValencyChecker.java opsin-2.3.1/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/ValencyChecker.java --- opsin-1.5.0/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/ValencyChecker.java 2013-07-21 14:02:29.000000000 +0000 +++ opsin-2.3.1/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/ValencyChecker.java 2017-07-23 20:55:18.000000000 +0000 @@ -1,5 +1,6 @@ package uk.ac.cam.ch.wwmm.opsin; +import java.util.EnumMap; import java.util.HashMap; import java.util.Map; @@ -16,288 +17,311 @@ */ class ValencyChecker { - private static final Map expectedDefaultValency;//used to decide on the likely valency state - private static final Map valencyInHW;//used to decide whether an atom has spare valency in a ring, these are the same as specified in the Hantzch-Widman system - private static final Map> possibleStableValencies;//used to decide on the likely valency state + /** used to decide on the likely valency state*/ + private static final Map expectedDefaultValency = new EnumMap(ChemEl.class); + + /** used to decide whether an atom has spare valency in a ring, these are the same as specified in the Hantzch-Widman system */ + private static final Map valencyInHW = new EnumMap(ChemEl.class); + + /** used to decide on the likely valency state */ + private static final Map> possibleStableValencies = new EnumMap>(ChemEl.class); static { - expectedDefaultValency = new HashMap(); - expectedDefaultValency.put("B", 3); - expectedDefaultValency.put("Al", 3); - expectedDefaultValency.put("In", 3); - expectedDefaultValency.put("Ga", 3); - expectedDefaultValency.put("Tl", 3); - expectedDefaultValency.put("C", 4); - expectedDefaultValency.put("Si", 4); - expectedDefaultValency.put("Ge", 4); - expectedDefaultValency.put("Sn", 4); - expectedDefaultValency.put("Pb", 4); - expectedDefaultValency.put("N", 3); - expectedDefaultValency.put("P", 3); - expectedDefaultValency.put("As", 3); - expectedDefaultValency.put("Sb", 3); - expectedDefaultValency.put("Bi", 3); - expectedDefaultValency.put("O", 2); - expectedDefaultValency.put("S", 2); - expectedDefaultValency.put("Se", 2); - expectedDefaultValency.put("Te", 2); - expectedDefaultValency.put("Po", 2); - expectedDefaultValency.put("F", 1); - expectedDefaultValency.put("Cl", 1); - expectedDefaultValency.put("Br", 1); - expectedDefaultValency.put("I", 1); - expectedDefaultValency.put("At", 1); + expectedDefaultValency.put(ChemEl.B, 3); + expectedDefaultValency.put(ChemEl.Al, 3); + expectedDefaultValency.put(ChemEl.In, 3); + expectedDefaultValency.put(ChemEl.Ga, 3); + expectedDefaultValency.put(ChemEl.Tl, 3); + expectedDefaultValency.put(ChemEl.C, 4); + expectedDefaultValency.put(ChemEl.Si, 4); + expectedDefaultValency.put(ChemEl.Ge, 4); + expectedDefaultValency.put(ChemEl.Sn, 4); + expectedDefaultValency.put(ChemEl.Pb, 4); + expectedDefaultValency.put(ChemEl.N, 3); + expectedDefaultValency.put(ChemEl.P, 3); + expectedDefaultValency.put(ChemEl.As, 3); + expectedDefaultValency.put(ChemEl.Sb, 3); + expectedDefaultValency.put(ChemEl.Bi, 3); + expectedDefaultValency.put(ChemEl.O, 2); + expectedDefaultValency.put(ChemEl.S, 2); + expectedDefaultValency.put(ChemEl.Se, 2); + expectedDefaultValency.put(ChemEl.Te, 2); + expectedDefaultValency.put(ChemEl.Po, 2); + expectedDefaultValency.put(ChemEl.F, 1); + expectedDefaultValency.put(ChemEl.Cl, 1); + expectedDefaultValency.put(ChemEl.Br, 1); + expectedDefaultValency.put(ChemEl.I, 1); + expectedDefaultValency.put(ChemEl.At, 1); //in order of priority in the HW system - valencyInHW = new HashMap(); - valencyInHW.put("F", 1); - valencyInHW.put("Cl", 1); - valencyInHW.put("Br", 1); - valencyInHW.put("I", 1); - valencyInHW.put("O", 2); - valencyInHW.put("S", 2); - valencyInHW.put("Se", 2); - valencyInHW.put("Te", 2); - valencyInHW.put("N", 3); - valencyInHW.put("P", 3); - valencyInHW.put("As", 3); - valencyInHW.put("Sb", 3); - valencyInHW.put("Bi", 3); - valencyInHW.put("Si", 4); - valencyInHW.put("Ge", 4); - valencyInHW.put("Sn", 4); - valencyInHW.put("Pb", 4); - valencyInHW.put("B", 3); - valencyInHW.put("Al", 3); - valencyInHW.put("Ga", 3); - valencyInHW.put("In", 3); - valencyInHW.put("Tl", 3); - valencyInHW.put("Hg", 2); - - possibleStableValencies = new HashMap>(); - possibleStableValencies.put("H", new HashMap()); - possibleStableValencies.put("He", new HashMap()); - possibleStableValencies.put("Li", new HashMap()); - possibleStableValencies.put("Be", new HashMap()); - possibleStableValencies.put("B", new HashMap()); - possibleStableValencies.put("C", new HashMap()); - possibleStableValencies.put("N", new HashMap()); - possibleStableValencies.put("O", new HashMap()); - possibleStableValencies.put("F", new HashMap()); - possibleStableValencies.put("Ne", new HashMap()); - possibleStableValencies.put("Na", new HashMap()); - possibleStableValencies.put("Mg", new HashMap()); - possibleStableValencies.put("Al", new HashMap()); - possibleStableValencies.put("Si", new HashMap()); - possibleStableValencies.put("P", new HashMap()); - possibleStableValencies.put("S", new HashMap()); - possibleStableValencies.put("Cl", new HashMap()); - possibleStableValencies.put("Ar", new HashMap()); - possibleStableValencies.put("K", new HashMap()); - possibleStableValencies.put("Ca", new HashMap()); - possibleStableValencies.put("Ga", new HashMap()); - possibleStableValencies.put("Ge", new HashMap()); - possibleStableValencies.put("As", new HashMap()); - possibleStableValencies.put("Se", new HashMap()); - possibleStableValencies.put("Br", new HashMap()); - possibleStableValencies.put("Kr", new HashMap()); - possibleStableValencies.put("Rb", new HashMap()); - possibleStableValencies.put("Sr", new HashMap()); - possibleStableValencies.put("In", new HashMap()); - possibleStableValencies.put("Sn", new HashMap()); - possibleStableValencies.put("Sb", new HashMap()); - possibleStableValencies.put("Te", new HashMap()); - possibleStableValencies.put("I", new HashMap()); - possibleStableValencies.put("Xe", new HashMap()); - possibleStableValencies.put("Cs", new HashMap()); - possibleStableValencies.put("Ba", new HashMap()); - possibleStableValencies.put("Tl", new HashMap()); - possibleStableValencies.put("Pb", new HashMap()); - possibleStableValencies.put("Bi", new HashMap()); - possibleStableValencies.put("Po", new HashMap()); - possibleStableValencies.put("At", new HashMap()); - possibleStableValencies.put("Rn", new HashMap()); - possibleStableValencies.put("Fr", new HashMap()); - possibleStableValencies.put("Ra", new HashMap()); - - possibleStableValencies.get("H").put(0, new Integer[]{1}); - possibleStableValencies.get("He").put(0, new Integer[]{0}); - possibleStableValencies.get("Li").put(0, new Integer[]{1}); - possibleStableValencies.get("Be").put(0, new Integer[]{2}); - possibleStableValencies.get("B").put(0, new Integer[]{3}); - possibleStableValencies.get("C").put(0, new Integer[]{4}); - possibleStableValencies.get("N").put(0, new Integer[]{3}); - possibleStableValencies.get("O").put(0, new Integer[]{2}); - possibleStableValencies.get("F").put(0, new Integer[]{1}); - possibleStableValencies.get("Ne").put(0, new Integer[]{0}); - possibleStableValencies.get("Na").put(0, new Integer[]{1}); - possibleStableValencies.get("Mg").put(0, new Integer[]{2}); - possibleStableValencies.get("Al").put(0, new Integer[]{3}); - possibleStableValencies.get("Si").put(0, new Integer[]{4}); - possibleStableValencies.get("P").put(0, new Integer[]{3,5}); - possibleStableValencies.get("S").put(0, new Integer[]{2,4,6}); - possibleStableValencies.get("Cl").put(0, new Integer[]{1,3,5,7}); - possibleStableValencies.get("Ar").put(0, new Integer[]{0}); - possibleStableValencies.get("K").put(0, new Integer[]{1}); - possibleStableValencies.get("Ca").put(0, new Integer[]{2}); - possibleStableValencies.get("Ga").put(0, new Integer[]{3}); - possibleStableValencies.get("Ge").put(0, new Integer[]{4}); - possibleStableValencies.get("As").put(0, new Integer[]{3,5}); - possibleStableValencies.get("Se").put(0, new Integer[]{2,4,6}); - possibleStableValencies.get("Br").put(0, new Integer[]{1,3,5,7}); - possibleStableValencies.get("Kr").put(0, new Integer[]{0,2}); - possibleStableValencies.get("Rb").put(0, new Integer[]{1}); - possibleStableValencies.get("Sr").put(0, new Integer[]{2}); - possibleStableValencies.get("In").put(0, new Integer[]{3}); - possibleStableValencies.get("Sn").put(0, new Integer[]{2,4}); - possibleStableValencies.get("Sb").put(0, new Integer[]{3,5}); - possibleStableValencies.get("Te").put(0, new Integer[]{2,4,6}); - possibleStableValencies.get("I").put(0, new Integer[]{1,3,5,7}); - possibleStableValencies.get("Xe").put(0, new Integer[]{0,2,4,6,8}); - possibleStableValencies.get("Cs").put(0, new Integer[]{1}); - possibleStableValencies.get("Ba").put(0, new Integer[]{2}); - possibleStableValencies.get("Tl").put(0, new Integer[]{1,3}); - possibleStableValencies.get("Pb").put(0, new Integer[]{2,4}); - possibleStableValencies.get("Bi").put(0, new Integer[]{3,5}); - possibleStableValencies.get("Po").put(0, new Integer[]{2,4,6}); - possibleStableValencies.get("At").put(0, new Integer[]{1,3,5,7}); - possibleStableValencies.get("Rn").put(0, new Integer[]{0,2,4,6,8}); - possibleStableValencies.get("Fr").put(0, new Integer[]{1}); - possibleStableValencies.get("Ra").put(0, new Integer[]{2}); - - possibleStableValencies.get("H").put(1, new Integer[]{0}); - possibleStableValencies.get("Li").put(1, new Integer[]{0}); - possibleStableValencies.get("Be").put(1, new Integer[]{1}); - possibleStableValencies.get("Be").put(2, new Integer[]{0}); - possibleStableValencies.get("B").put(2, new Integer[]{1}); - possibleStableValencies.get("B").put(1, new Integer[]{2}); - possibleStableValencies.get("B").put(-1, new Integer[]{4}); - possibleStableValencies.get("B").put(-2, new Integer[]{3}); - possibleStableValencies.get("C").put(2, new Integer[]{2}); - possibleStableValencies.get("C").put(1, new Integer[]{3}); - possibleStableValencies.get("C").put(-1, new Integer[]{3}); - possibleStableValencies.get("C").put(-2, new Integer[]{2}); - possibleStableValencies.get("N").put(2, new Integer[]{3}); - possibleStableValencies.get("N").put(1, new Integer[]{4}); - possibleStableValencies.get("N").put(-1, new Integer[]{2}); - possibleStableValencies.get("N").put(-2, new Integer[]{1}); - possibleStableValencies.get("O").put(1, new Integer[]{4}); - possibleStableValencies.get("O").put(1, new Integer[]{3,5}); - possibleStableValencies.get("O").put(-1, new Integer[]{1}); - possibleStableValencies.get("O").put(-2, new Integer[]{0}); - possibleStableValencies.get("F").put(2, new Integer[]{3,5}); - possibleStableValencies.get("F").put(1, new Integer[]{2}); - possibleStableValencies.get("F").put(-1, new Integer[]{0}); - possibleStableValencies.get("Na").put(1, new Integer[]{0}); - possibleStableValencies.get("Na").put(-1, new Integer[]{0}); - possibleStableValencies.get("Mg").put(2, new Integer[]{0}); - possibleStableValencies.get("Al").put(3, new Integer[]{0}); - possibleStableValencies.get("Al").put(2, new Integer[]{1}); - possibleStableValencies.get("Al").put(1, new Integer[]{2}); - possibleStableValencies.get("Al").put(-1, new Integer[]{4}); - possibleStableValencies.get("Al").put(-2, new Integer[]{3,5}); - possibleStableValencies.get("Si").put(2, new Integer[]{2}); - possibleStableValencies.get("Si").put(1, new Integer[]{3}); - possibleStableValencies.get("Si").put(-1, new Integer[]{3,5}); - possibleStableValencies.get("Si").put(-2, new Integer[]{2}); - possibleStableValencies.get("P").put(2, new Integer[]{3}); - possibleStableValencies.get("P").put(1, new Integer[]{4}); - possibleStableValencies.get("P").put(-1, new Integer[]{2,4,6}); - possibleStableValencies.get("P").put(-2, new Integer[]{1,3,5,7}); - possibleStableValencies.get("S").put(2, new Integer[]{4}); - possibleStableValencies.get("S").put(1, new Integer[]{3,5}); - possibleStableValencies.get("S").put(-1, new Integer[]{1,3,5,7}); - possibleStableValencies.get("S").put(-2, new Integer[]{0}); - possibleStableValencies.get("Cl").put(2, new Integer[]{3,5}); - possibleStableValencies.get("Cl").put(1, new Integer[]{2,4,6}); - possibleStableValencies.get("Cl").put(-1, new Integer[]{0}); - possibleStableValencies.get("K").put(1, new Integer[]{0}); - possibleStableValencies.get("K").put(-1, new Integer[]{0}); - possibleStableValencies.get("Ca").put(2, new Integer[]{0}); - possibleStableValencies.get("Ca").put(1, new Integer[]{1}); - possibleStableValencies.get("Ga").put(3, new Integer[]{0}); - possibleStableValencies.get("Ga").put(2, new Integer[]{1}); - possibleStableValencies.get("Ga").put(1, new Integer[]{0}); - possibleStableValencies.get("Ga").put(-1, new Integer[]{4}); - possibleStableValencies.get("Ga").put(-2, new Integer[]{3,5}); - possibleStableValencies.get("Ge").put(4, new Integer[]{0}); - possibleStableValencies.get("Ge").put(1, new Integer[]{3}); - possibleStableValencies.get("Ge").put(-1, new Integer[]{3,5}); - possibleStableValencies.get("Ge").put(-2, new Integer[]{2,4,6}); - possibleStableValencies.get("As").put(2, new Integer[]{3}); - possibleStableValencies.get("As").put(1, new Integer[]{4}); - possibleStableValencies.get("As").put(-1, new Integer[]{2,4,6}); - possibleStableValencies.get("As").put(-2, new Integer[]{1,3,5,7}); - possibleStableValencies.get("As").put(-3, new Integer[]{0}); - possibleStableValencies.get("Se").put(2, new Integer[]{4}); - possibleStableValencies.get("Se").put(1, new Integer[]{3,5}); - possibleStableValencies.get("Se").put(-1, new Integer[]{1,3,5,7}); - possibleStableValencies.get("Se").put(-2, new Integer[]{0}); - possibleStableValencies.get("Br").put(2, new Integer[]{3,5}); - possibleStableValencies.get("Br").put(1, new Integer[]{2,4,6}); - possibleStableValencies.get("Br").put(-1, new Integer[]{0}); - possibleStableValencies.get("Rb").put(1, new Integer[]{0}); - possibleStableValencies.get("Rb").put(-1, new Integer[]{0}); - possibleStableValencies.get("Sr").put(2, new Integer[]{0}); - possibleStableValencies.get("Sr").put(1, new Integer[]{1}); - possibleStableValencies.get("In").put(3, new Integer[]{0}); - possibleStableValencies.get("In").put(2, new Integer[]{1}); - possibleStableValencies.get("In").put(1, new Integer[]{0}); - possibleStableValencies.get("In").put(-1, new Integer[]{2,4}); - possibleStableValencies.get("In").put(-2, new Integer[]{3,5}); - possibleStableValencies.get("Sn").put(4, new Integer[]{0}); - possibleStableValencies.get("Sn").put(2, new Integer[]{0}); - possibleStableValencies.get("Sn").put(1, new Integer[]{3}); - possibleStableValencies.get("Sn").put(-1, new Integer[]{3,5}); - possibleStableValencies.get("Sn").put(-2, new Integer[]{2,4,6}); - possibleStableValencies.get("Sb").put(3, new Integer[]{0}); - possibleStableValencies.get("Sb").put(2, new Integer[]{3}); - possibleStableValencies.get("Sb").put(1, new Integer[]{2,4}); - possibleStableValencies.get("Sb").put(-1, new Integer[]{2,4,6}); - possibleStableValencies.get("Sb").put(-2, new Integer[]{1,3,5,7}); - possibleStableValencies.get("Te").put(2, new Integer[]{2,4}); - possibleStableValencies.get("Te").put(1, new Integer[]{3,5}); - possibleStableValencies.get("Te").put(-1, new Integer[]{1,3,5,7}); - possibleStableValencies.get("Te").put(-2, new Integer[]{0}); - possibleStableValencies.get("I").put(2, new Integer[]{3,5}); - possibleStableValencies.get("I").put(1, new Integer[]{2,4,6}); - possibleStableValencies.get("I").put(-1, new Integer[]{0}); - possibleStableValencies.get("Cs").put(1, new Integer[]{0}); - possibleStableValencies.get("Cs").put(-1, new Integer[]{0}); - possibleStableValencies.get("Ba").put(2, new Integer[]{0}); - possibleStableValencies.get("Ba").put(1, new Integer[]{1}); - possibleStableValencies.get("Pb").put(2, new Integer[]{0}); - possibleStableValencies.get("Pb").put(1, new Integer[]{3}); - possibleStableValencies.get("Pb").put(-1, new Integer[]{3,5}); - possibleStableValencies.get("Pb").put(-2, new Integer[]{2,4,6}); - possibleStableValencies.get("Bi").put(3, new Integer[]{0}); - possibleStableValencies.get("Bi").put(2, new Integer[]{3}); - possibleStableValencies.get("Bi").put(1, new Integer[]{2,4}); - possibleStableValencies.get("Bi").put(-1, new Integer[]{2,4,6}); - possibleStableValencies.get("Bi").put(-2, new Integer[]{1,3,5,7}); - possibleStableValencies.get("At").put(2, new Integer[]{3,5}); - possibleStableValencies.get("At").put(1, new Integer[]{2,4,6}); - possibleStableValencies.get("At").put(-1, new Integer[]{0}); - possibleStableValencies.get("Fr").put(1, new Integer[]{0}); - possibleStableValencies.get("Ra").put(2, new Integer[]{0}); - possibleStableValencies.get("Ra").put(1, new Integer[]{1}); + valencyInHW.put(ChemEl.F, 1); + valencyInHW.put(ChemEl.Cl, 1); + valencyInHW.put(ChemEl.Br, 1); + valencyInHW.put(ChemEl.I, 1); + valencyInHW.put(ChemEl.O, 2); + valencyInHW.put(ChemEl.S, 2); + valencyInHW.put(ChemEl.Se, 2); + valencyInHW.put(ChemEl.Te, 2); + valencyInHW.put(ChemEl.N, 3); + valencyInHW.put(ChemEl.P, 3); + valencyInHW.put(ChemEl.As, 3); + valencyInHW.put(ChemEl.Sb, 3); + valencyInHW.put(ChemEl.Bi, 3); + valencyInHW.put(ChemEl.Si, 4); + valencyInHW.put(ChemEl.Ge, 4); + valencyInHW.put(ChemEl.Sn, 4); + valencyInHW.put(ChemEl.Pb, 4); + valencyInHW.put(ChemEl.B, 3); + valencyInHW.put(ChemEl.Al, 3); + valencyInHW.put(ChemEl.Ga, 3); + valencyInHW.put(ChemEl.In, 3); + valencyInHW.put(ChemEl.Tl, 3); + valencyInHW.put(ChemEl.Hg, 2); + + valencyInHW.put(ChemEl.C, 4); + + possibleStableValencies.put(ChemEl.H, new HashMap()); + possibleStableValencies.put(ChemEl.He, new HashMap()); + possibleStableValencies.put(ChemEl.Li, new HashMap()); + possibleStableValencies.put(ChemEl.Be, new HashMap()); + possibleStableValencies.put(ChemEl.B, new HashMap()); + possibleStableValencies.put(ChemEl.C, new HashMap()); + possibleStableValencies.put(ChemEl.N, new HashMap()); + possibleStableValencies.put(ChemEl.O, new HashMap()); + possibleStableValencies.put(ChemEl.F, new HashMap()); + possibleStableValencies.put(ChemEl.Ne, new HashMap()); + possibleStableValencies.put(ChemEl.Na, new HashMap()); + possibleStableValencies.put(ChemEl.Mg, new HashMap()); + possibleStableValencies.put(ChemEl.Al, new HashMap()); + possibleStableValencies.put(ChemEl.Si, new HashMap()); + possibleStableValencies.put(ChemEl.P, new HashMap()); + possibleStableValencies.put(ChemEl.S, new HashMap()); + possibleStableValencies.put(ChemEl.Cl, new HashMap()); + possibleStableValencies.put(ChemEl.Ar, new HashMap()); + possibleStableValencies.put(ChemEl.K, new HashMap()); + possibleStableValencies.put(ChemEl.Ca, new HashMap()); + possibleStableValencies.put(ChemEl.Ga, new HashMap()); + possibleStableValencies.put(ChemEl.Ge, new HashMap()); + possibleStableValencies.put(ChemEl.As, new HashMap()); + possibleStableValencies.put(ChemEl.Se, new HashMap()); + possibleStableValencies.put(ChemEl.Br, new HashMap()); + possibleStableValencies.put(ChemEl.Kr, new HashMap()); + possibleStableValencies.put(ChemEl.Rb, new HashMap()); + possibleStableValencies.put(ChemEl.Sr, new HashMap()); + possibleStableValencies.put(ChemEl.In, new HashMap()); + possibleStableValencies.put(ChemEl.Sn, new HashMap()); + possibleStableValencies.put(ChemEl.Sb, new HashMap()); + possibleStableValencies.put(ChemEl.Te, new HashMap()); + possibleStableValencies.put(ChemEl.I, new HashMap()); + possibleStableValencies.put(ChemEl.Xe, new HashMap()); + possibleStableValencies.put(ChemEl.Cs, new HashMap()); + possibleStableValencies.put(ChemEl.Ba, new HashMap()); + possibleStableValencies.put(ChemEl.Tl, new HashMap()); + possibleStableValencies.put(ChemEl.Pb, new HashMap()); + possibleStableValencies.put(ChemEl.Bi, new HashMap()); + possibleStableValencies.put(ChemEl.Po, new HashMap()); + possibleStableValencies.put(ChemEl.At, new HashMap()); + possibleStableValencies.put(ChemEl.Rn, new HashMap()); + possibleStableValencies.put(ChemEl.Fr, new HashMap()); + possibleStableValencies.put(ChemEl.Ra, new HashMap()); + + possibleStableValencies.get(ChemEl.H).put(0, new Integer[]{1}); + possibleStableValencies.get(ChemEl.He).put(0, new Integer[]{0}); + possibleStableValencies.get(ChemEl.Li).put(0, new Integer[]{1}); + possibleStableValencies.get(ChemEl.Be).put(0, new Integer[]{2}); + possibleStableValencies.get(ChemEl.B).put(0, new Integer[]{3}); + possibleStableValencies.get(ChemEl.C).put(0, new Integer[]{4}); + possibleStableValencies.get(ChemEl.N).put(0, new Integer[]{3}); + possibleStableValencies.get(ChemEl.O).put(0, new Integer[]{2}); + possibleStableValencies.get(ChemEl.F).put(0, new Integer[]{1}); + possibleStableValencies.get(ChemEl.Ne).put(0, new Integer[]{0}); + possibleStableValencies.get(ChemEl.Na).put(0, new Integer[]{1}); + possibleStableValencies.get(ChemEl.Mg).put(0, new Integer[]{2}); + possibleStableValencies.get(ChemEl.Al).put(0, new Integer[]{3}); + possibleStableValencies.get(ChemEl.Si).put(0, new Integer[]{4}); + possibleStableValencies.get(ChemEl.P).put(0, new Integer[]{3,5}); + possibleStableValencies.get(ChemEl.S).put(0, new Integer[]{2,4,6}); + possibleStableValencies.get(ChemEl.Cl).put(0, new Integer[]{1,3,5,7}); + possibleStableValencies.get(ChemEl.Ar).put(0, new Integer[]{0}); + possibleStableValencies.get(ChemEl.K).put(0, new Integer[]{1}); + possibleStableValencies.get(ChemEl.Ca).put(0, new Integer[]{2}); + possibleStableValencies.get(ChemEl.Ga).put(0, new Integer[]{3}); + possibleStableValencies.get(ChemEl.Ge).put(0, new Integer[]{4}); + possibleStableValencies.get(ChemEl.As).put(0, new Integer[]{3,5}); + possibleStableValencies.get(ChemEl.Se).put(0, new Integer[]{2,4,6}); + possibleStableValencies.get(ChemEl.Br).put(0, new Integer[]{1,3,5,7}); + possibleStableValencies.get(ChemEl.Kr).put(0, new Integer[]{0,2}); + possibleStableValencies.get(ChemEl.Rb).put(0, new Integer[]{1}); + possibleStableValencies.get(ChemEl.Sr).put(0, new Integer[]{2}); + possibleStableValencies.get(ChemEl.In).put(0, new Integer[]{3}); + possibleStableValencies.get(ChemEl.Sn).put(0, new Integer[]{2,4}); + possibleStableValencies.get(ChemEl.Sb).put(0, new Integer[]{3,5}); + possibleStableValencies.get(ChemEl.Te).put(0, new Integer[]{2,4,6}); + possibleStableValencies.get(ChemEl.I).put(0, new Integer[]{1,3,5,7}); + possibleStableValencies.get(ChemEl.Xe).put(0, new Integer[]{0,2,4,6,8}); + possibleStableValencies.get(ChemEl.Cs).put(0, new Integer[]{1}); + possibleStableValencies.get(ChemEl.Ba).put(0, new Integer[]{2}); + possibleStableValencies.get(ChemEl.Tl).put(0, new Integer[]{1,3}); + possibleStableValencies.get(ChemEl.Pb).put(0, new Integer[]{2,4}); + possibleStableValencies.get(ChemEl.Bi).put(0, new Integer[]{3,5}); + possibleStableValencies.get(ChemEl.Po).put(0, new Integer[]{2,4,6}); + possibleStableValencies.get(ChemEl.At).put(0, new Integer[]{1,3,5,7}); + possibleStableValencies.get(ChemEl.Rn).put(0, new Integer[]{0,2,4,6,8}); + possibleStableValencies.get(ChemEl.Fr).put(0, new Integer[]{1}); + possibleStableValencies.get(ChemEl.Ra).put(0, new Integer[]{2}); + + possibleStableValencies.get(ChemEl.H).put(1, new Integer[]{0}); + possibleStableValencies.get(ChemEl.Li).put(1, new Integer[]{0}); + possibleStableValencies.get(ChemEl.Be).put(1, new Integer[]{1}); + possibleStableValencies.get(ChemEl.Be).put(2, new Integer[]{0}); + possibleStableValencies.get(ChemEl.B).put(2, new Integer[]{1}); + possibleStableValencies.get(ChemEl.B).put(1, new Integer[]{2}); + possibleStableValencies.get(ChemEl.B).put(-1, new Integer[]{4}); + possibleStableValencies.get(ChemEl.B).put(-2, new Integer[]{3}); + possibleStableValencies.get(ChemEl.C).put(2, new Integer[]{2}); + possibleStableValencies.get(ChemEl.C).put(1, new Integer[]{3}); + possibleStableValencies.get(ChemEl.C).put(-1, new Integer[]{3}); + possibleStableValencies.get(ChemEl.C).put(-2, new Integer[]{2}); + possibleStableValencies.get(ChemEl.N).put(2, new Integer[]{3}); + possibleStableValencies.get(ChemEl.N).put(1, new Integer[]{4}); + possibleStableValencies.get(ChemEl.N).put(-1, new Integer[]{2}); + possibleStableValencies.get(ChemEl.N).put(-2, new Integer[]{1}); + possibleStableValencies.get(ChemEl.O).put(2, new Integer[]{4}); + possibleStableValencies.get(ChemEl.O).put(1, new Integer[]{3,5}); + possibleStableValencies.get(ChemEl.O).put(-1, new Integer[]{1}); + possibleStableValencies.get(ChemEl.O).put(-2, new Integer[]{0}); + possibleStableValencies.get(ChemEl.F).put(2, new Integer[]{3,5}); + possibleStableValencies.get(ChemEl.F).put(1, new Integer[]{2}); + possibleStableValencies.get(ChemEl.F).put(-1, new Integer[]{0}); + possibleStableValencies.get(ChemEl.Na).put(1, new Integer[]{0}); + possibleStableValencies.get(ChemEl.Na).put(-1, new Integer[]{0}); + possibleStableValencies.get(ChemEl.Mg).put(2, new Integer[]{0}); + possibleStableValencies.get(ChemEl.Al).put(3, new Integer[]{0}); + possibleStableValencies.get(ChemEl.Al).put(2, new Integer[]{1}); + possibleStableValencies.get(ChemEl.Al).put(1, new Integer[]{2}); + possibleStableValencies.get(ChemEl.Al).put(-1, new Integer[]{4}); + possibleStableValencies.get(ChemEl.Al).put(-2, new Integer[]{3,5}); + possibleStableValencies.get(ChemEl.Si).put(2, new Integer[]{2}); + possibleStableValencies.get(ChemEl.Si).put(1, new Integer[]{3}); + possibleStableValencies.get(ChemEl.Si).put(-1, new Integer[]{3,5}); + possibleStableValencies.get(ChemEl.Si).put(-2, new Integer[]{2}); + possibleStableValencies.get(ChemEl.P).put(2, new Integer[]{3}); + possibleStableValencies.get(ChemEl.P).put(1, new Integer[]{4}); + possibleStableValencies.get(ChemEl.P).put(-1, new Integer[]{2,4,6}); + possibleStableValencies.get(ChemEl.P).put(-2, new Integer[]{1,3,5,7}); + possibleStableValencies.get(ChemEl.S).put(2, new Integer[]{4}); + possibleStableValencies.get(ChemEl.S).put(1, new Integer[]{3,5}); + possibleStableValencies.get(ChemEl.S).put(-1, new Integer[]{1,3,5,7}); + possibleStableValencies.get(ChemEl.S).put(-2, new Integer[]{0}); + possibleStableValencies.get(ChemEl.Cl).put(2, new Integer[]{3,5}); + possibleStableValencies.get(ChemEl.Cl).put(1, new Integer[]{2,4,6}); + possibleStableValencies.get(ChemEl.Cl).put(-1, new Integer[]{0}); + possibleStableValencies.get(ChemEl.K).put(1, new Integer[]{0}); + possibleStableValencies.get(ChemEl.K).put(-1, new Integer[]{0}); + possibleStableValencies.get(ChemEl.Ca).put(2, new Integer[]{0}); + possibleStableValencies.get(ChemEl.Ca).put(1, new Integer[]{1}); + possibleStableValencies.get(ChemEl.Ga).put(3, new Integer[]{0}); + possibleStableValencies.get(ChemEl.Ga).put(2, new Integer[]{1}); + possibleStableValencies.get(ChemEl.Ga).put(1, new Integer[]{0}); + possibleStableValencies.get(ChemEl.Ga).put(-1, new Integer[]{4}); + possibleStableValencies.get(ChemEl.Ga).put(-2, new Integer[]{3,5}); + possibleStableValencies.get(ChemEl.Ge).put(4, new Integer[]{0}); + possibleStableValencies.get(ChemEl.Ge).put(1, new Integer[]{3}); + possibleStableValencies.get(ChemEl.Ge).put(-1, new Integer[]{3,5}); + possibleStableValencies.get(ChemEl.Ge).put(-2, new Integer[]{2,4,6}); + possibleStableValencies.get(ChemEl.As).put(2, new Integer[]{3}); + possibleStableValencies.get(ChemEl.As).put(1, new Integer[]{4}); + possibleStableValencies.get(ChemEl.As).put(-1, new Integer[]{2,4,6}); + possibleStableValencies.get(ChemEl.As).put(-2, new Integer[]{1,3,5,7}); + possibleStableValencies.get(ChemEl.As).put(-3, new Integer[]{0}); + possibleStableValencies.get(ChemEl.Se).put(2, new Integer[]{4}); + possibleStableValencies.get(ChemEl.Se).put(1, new Integer[]{3,5}); + possibleStableValencies.get(ChemEl.Se).put(-1, new Integer[]{1,3,5,7}); + possibleStableValencies.get(ChemEl.Se).put(-2, new Integer[]{0}); + possibleStableValencies.get(ChemEl.Br).put(2, new Integer[]{3,5}); + possibleStableValencies.get(ChemEl.Br).put(1, new Integer[]{2,4,6}); + possibleStableValencies.get(ChemEl.Br).put(-1, new Integer[]{0}); + possibleStableValencies.get(ChemEl.Rb).put(1, new Integer[]{0}); + possibleStableValencies.get(ChemEl.Rb).put(-1, new Integer[]{0}); + possibleStableValencies.get(ChemEl.Sr).put(2, new Integer[]{0}); + possibleStableValencies.get(ChemEl.Sr).put(1, new Integer[]{1}); + possibleStableValencies.get(ChemEl.In).put(3, new Integer[]{0}); + possibleStableValencies.get(ChemEl.In).put(2, new Integer[]{1}); + possibleStableValencies.get(ChemEl.In).put(1, new Integer[]{0}); + possibleStableValencies.get(ChemEl.In).put(-1, new Integer[]{2,4}); + possibleStableValencies.get(ChemEl.In).put(-2, new Integer[]{3,5}); + possibleStableValencies.get(ChemEl.Sn).put(4, new Integer[]{0}); + possibleStableValencies.get(ChemEl.Sn).put(2, new Integer[]{0}); + possibleStableValencies.get(ChemEl.Sn).put(1, new Integer[]{3}); + possibleStableValencies.get(ChemEl.Sn).put(-1, new Integer[]{3,5}); + possibleStableValencies.get(ChemEl.Sn).put(-2, new Integer[]{2,4,6}); + possibleStableValencies.get(ChemEl.Sb).put(3, new Integer[]{0}); + possibleStableValencies.get(ChemEl.Sb).put(2, new Integer[]{3}); + possibleStableValencies.get(ChemEl.Sb).put(1, new Integer[]{2,4}); + possibleStableValencies.get(ChemEl.Sb).put(-1, new Integer[]{2,4,6}); + possibleStableValencies.get(ChemEl.Sb).put(-2, new Integer[]{1,3,5,7}); + possibleStableValencies.get(ChemEl.Te).put(2, new Integer[]{2,4}); + possibleStableValencies.get(ChemEl.Te).put(1, new Integer[]{3,5}); + possibleStableValencies.get(ChemEl.Te).put(-1, new Integer[]{1,3,5,7}); + possibleStableValencies.get(ChemEl.Te).put(-2, new Integer[]{0}); + possibleStableValencies.get(ChemEl.I).put(2, new Integer[]{3,5}); + possibleStableValencies.get(ChemEl.I).put(1, new Integer[]{2,4,6}); + possibleStableValencies.get(ChemEl.I).put(-1, new Integer[]{0}); + possibleStableValencies.get(ChemEl.Cs).put(1, new Integer[]{0}); + possibleStableValencies.get(ChemEl.Cs).put(-1, new Integer[]{0}); + possibleStableValencies.get(ChemEl.Ba).put(2, new Integer[]{0}); + possibleStableValencies.get(ChemEl.Ba).put(1, new Integer[]{1}); + possibleStableValencies.get(ChemEl.Pb).put(2, new Integer[]{0}); + possibleStableValencies.get(ChemEl.Pb).put(1, new Integer[]{3}); + possibleStableValencies.get(ChemEl.Pb).put(-1, new Integer[]{3,5}); + possibleStableValencies.get(ChemEl.Pb).put(-2, new Integer[]{2,4,6}); + possibleStableValencies.get(ChemEl.Bi).put(3, new Integer[]{0}); + possibleStableValencies.get(ChemEl.Bi).put(2, new Integer[]{3}); + possibleStableValencies.get(ChemEl.Bi).put(1, new Integer[]{2,4}); + possibleStableValencies.get(ChemEl.Bi).put(-1, new Integer[]{2,4,6}); + possibleStableValencies.get(ChemEl.Bi).put(-2, new Integer[]{1,3,5,7}); + possibleStableValencies.get(ChemEl.At).put(2, new Integer[]{3,5}); + possibleStableValencies.get(ChemEl.At).put(1, new Integer[]{2,4,6}); + possibleStableValencies.get(ChemEl.At).put(-1, new Integer[]{0}); + possibleStableValencies.get(ChemEl.Fr).put(1, new Integer[]{0}); + possibleStableValencies.get(ChemEl.Ra).put(2, new Integer[]{0}); + possibleStableValencies.get(ChemEl.Ra).put(1, new Integer[]{1}); } /** - * Given an element symbol (e.g. Na) and charge (e.g. 1) returns the highest stable valency that OPSIN knows is possible - * If for the particular combination of element symbol and charge the highest stable valency is not known null is returned - * @param symbol + * Given a chemical element (e.g. Na) and charge (e.g. 1) returns the highest stable valency that OPSIN knows is possible + * If for the particular combination of chemical element and charge the highest stable valency is not known null is returned + * @param chemEl * @param charge * @return */ - static Integer getMaximumValency(String symbol, int charge) { - if (possibleStableValencies.get(symbol)!=null){ - if (possibleStableValencies.get(symbol).get(charge)!=null){ - return possibleStableValencies.get(symbol).get(charge)[possibleStableValencies.get(symbol).get(charge).length-1]; + static Integer getMaximumValency(ChemEl chemEl, int charge) { + Map possibleStableValenciesForEl = possibleStableValencies.get(chemEl); + if (possibleStableValenciesForEl != null){ + Integer[] possibleStableValenciesForElAndCharge = possibleStableValenciesForEl.get(charge); + if (possibleStableValenciesForElAndCharge != null){ + return possibleStableValenciesForElAndCharge[possibleStableValenciesForElAndCharge.length - 1]; } } return null; } + + /** + * Return the lambda convention derived valency if set otherwise returns the same as {@link #getMaximumValency(ChemEl, int)} + * Returns null if the maximum valency is not known + * @param a + * @return + */ + static Integer getMaximumValency(Atom a) { + Integer maxVal; + if (a.getLambdaConventionValency() != null) { + maxVal = a.getLambdaConventionValency() + a.getProtonsExplicitlyAddedOrRemoved(); + } + else{ + maxVal = getMaximumValency(a.getElement(), a.getCharge()); + } + return maxVal; + } /** * Checks whether the total incoming valency to an atom exceeds its expected valency @@ -307,17 +331,9 @@ */ static boolean checkValency(Atom a) { int valency = a.getIncomingValency() + a.getOutValency(); - Integer maxVal; - if (a.getLambdaConventionValency()!=null){ - maxVal=a.getLambdaConventionValency() + a.getProtonsExplicitlyAddedOrRemoved(); - } - else{ - String symbol = a.getElement(); - int charge = a.getCharge(); - maxVal=getMaximumValency(symbol, charge); - if(maxVal==null) { - return true; - } + Integer maxVal = getMaximumValency(a); + if(maxVal == null) { + return true; } return valency <= maxVal; } @@ -329,18 +345,10 @@ * @return */ static boolean checkValencyAvailableForBond(Atom a, int bondOrder) { - int valency =a.getIncomingValency() +bondOrder; - Integer maxVal; - if (a.getLambdaConventionValency()!=null){ - maxVal=a.getLambdaConventionValency() + a.getProtonsExplicitlyAddedOrRemoved(); - } - else{ - String symbol = a.getElement(); - int charge = a.getCharge(); - maxVal = getMaximumValency(symbol, charge); - if(maxVal==null) { - return true; - } + int valency = a.getIncomingValency() + bondOrder; + Integer maxVal = getMaximumValency(a); + if(maxVal == null) { + return true; } return valency <= maxVal; } @@ -348,7 +356,7 @@ /** Check whether changing to a heteroatom will result in valency being exceeded * spareValency and outValency is taken into account * @param a atom you are interested in - * @param the heteroatom atom which will be replacing it + * @param heteroatom atom which will be replacing it * @return */ static boolean checkValencyAvailableForReplacementByHeteroatom(Atom a, Atom heteroatom) { @@ -361,31 +369,34 @@ /** * Returns the default valency of an element when uncharged or null if unknown - * @param element + * @param chemlEl * @return */ - static Integer getDefaultValency(String element) { - return expectedDefaultValency.get(element); + static Integer getDefaultValency(ChemEl chemlEl) { + return expectedDefaultValency.get(chemlEl); } /** * Returns the valency of an element in the HW system (useful for deciding whether something should have double bonds in a ring) or null if unknown * Note that the HW system makes no claim about valency when the atom is charged - * @param element + * @param chemEl * @return */ - static Integer getHWValency(String element) { - return valencyInHW.get(element); + static Integer getHWValency(ChemEl chemEl) { + return valencyInHW.get(chemEl); } /** - * Returns the maximum valency of an element or null if unknown - * @param element + * Returns the maximum valency of an element with a given charge or null if unknown + * @param chemEl * @param charge * @return */ - static Integer[] getPossibleValencies(String element, int charge) { - if (possibleStableValencies.get(element)==null){return null;} - return possibleStableValencies.get(element).get(charge); + static Integer[] getPossibleValencies(ChemEl chemEl, int charge) { + Map possibleStableValenciesForEl = possibleStableValencies.get(chemEl); + if (possibleStableValenciesForEl == null){ + return null; + } + return possibleStableValenciesForEl.get(charge); } } diff -Nru opsin-1.5.0/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/WordRule.java opsin-2.3.1/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/WordRule.java --- opsin-1.5.0/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/WordRule.java 2013-07-21 14:02:29.000000000 +0000 +++ opsin-2.3.1/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/WordRule.java 2017-07-23 20:55:18.000000000 +0000 @@ -11,8 +11,9 @@ additionCompound, acidHalideOrPseudoHalide, acidReplacingFunctionalGroup, + amineDiConjunctiveSuffix, anhydride, - potentialBiochemicalEster, + potentialAlcoholEster, carbonylDerivative, cyclicPeptide, divalentFunctionalGroup, diff -Nru opsin-1.5.0/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/WordRules.java opsin-2.3.1/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/WordRules.java --- opsin-1.5.0/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/WordRules.java 2013-07-21 14:02:29.000000000 +0000 +++ opsin-2.3.1/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/WordRules.java 2017-07-23 20:55:18.000000000 +0000 @@ -2,13 +2,14 @@ import java.io.IOException; import java.util.ArrayList; +import java.util.Collections; import java.util.List; +import java.util.Locale; import java.util.regex.Pattern; - -import nu.xom.Attribute; -import nu.xom.Element; -import nu.xom.Elements; +import javax.xml.stream.XMLStreamConstants; +import javax.xml.stream.XMLStreamException; +import javax.xml.stream.XMLStreamReader; import static uk.ac.cam.ch.wwmm.opsin.XmlDeclarations.*; @@ -19,92 +20,120 @@ */ class WordRules { + /**The wordRules themselves.*/ + private final List wordRuleList; + + enum EndsWithGroup { + acid, + ateGroup; + } + + private static final Pattern icOrOusAcid = Pattern.compile("(ic|ous)([ ]?acid)?$"); + private static final Pattern ateOrIteOrAmide = Pattern.compile("(at|it|amid)e?$"); + /** * Describes a word that a wordRule is looking for * @author dl387 * */ - static class WordDescription { + private static class WordDescription { /**Whether the word is a full word, substituent word or functionalTerm word*/ private final WordType type; - + + /**A group with a hardcoded method for efficient detection */ + private final EndsWithGroup endsWithGroup; + /**A case insensitive pattern which attempts to match the end of the String value of the word*/ - private Pattern endsWithPattern = null; + private final Pattern endsWithPattern; /**The case insensitive String value of the word */ - private String value = null; + private final String value; /** Only applicable for functionalTerms. The string value of the functionalTerm's type attribute*/ - private String functionalGroupType = null; - - /** The value of the value attribute of the last group element in the word e.g. maybe a SMILES string*/ - private String endsWithGroupValueAtr = null; + private final String functionalGroupType; /** The value of the type attribute of the last group element in the word e.g. maybe aminoAcid*/ - private String endsWithGroupType = null; + private final String endsWithGroupType; /** The value of the subType attribute of the last group element in the word e.g. maybe elementaryAtom*/ - private String endsWithGroupSubType = null; + private final String endsWithGroupSubType; + + /** + * Makes a description of a word to looks for + * @param reader + */ + WordDescription(XMLStreamReader reader){ + WordType type = null; + String value = null; + EndsWithGroup endsWithGroup = null; + Pattern endsWithPattern = null; + String functionalGroupType = null; + String endsWithGroupType = null; + String endsWithGroupSubType = null; + for (int i = 0, l = reader.getAttributeCount(); i < l; i++) { + String atrName = reader.getAttributeLocalName(i); + String atrValue = reader.getAttributeValue(i); + if (atrName.equals("type")){ + type = WordType.valueOf(atrValue); + } + else if (atrName.equals("value")){ + value = atrValue; + } + else if (atrName.equals("functionalGroupType")){ + functionalGroupType = atrValue; + } + else if (atrName.equals("endsWith")){ + endsWithGroup = EndsWithGroup.valueOf(atrValue); + } + else if (atrName.equals("endsWithRegex")){ + endsWithPattern = Pattern.compile(atrValue +"$", Pattern.CASE_INSENSITIVE); + } + else if (atrName.equals("endsWithGroupType")){ + endsWithGroupType = atrValue; + } + else if (atrName.equals("endsWithGroupSubType")){ + endsWithGroupSubType = atrValue; + } + } + if (type == null) { + throw new RuntimeException("Malformed wordRules"); + } + this.type = type; + this.endsWithGroup = endsWithGroup; + this.endsWithPattern = endsWithPattern; + this.value = value; + this.functionalGroupType = functionalGroupType; + this.endsWithGroupType = endsWithGroupType; + this.endsWithGroupSubType = endsWithGroupSubType; + } WordType getType() { return type; } + + EndsWithGroup getEndsWithGroup() { + return endsWithGroup; + } Pattern getEndsWithPattern() { return endsWithPattern; } - void setEndsWithPattern(Pattern endsWithPattern) { - this.endsWithPattern = endsWithPattern; - } - String getValue() { return value; } - void setValue(String value) { - this.value = value.toLowerCase(); - } - String getFunctionalGroupType() { return functionalGroupType; } - void setFunctionalGroupType(String functionalGroupType) { - this.functionalGroupType = functionalGroupType; - } - - String getEndsWithGroupValueAtr() { - return endsWithGroupValueAtr; - } - - void setEndsWithGroupValueAtr(String endsWithElementValueAtr) { - this.endsWithGroupValueAtr = endsWithElementValueAtr; - } - String getEndsWithGroupType() { return endsWithGroupType; } - - void setEndsWithGroupType(String endsWithElementType) { - this.endsWithGroupType = endsWithElementType; - } String getEndsWithGroupSubType() { return endsWithGroupSubType; } - - void setEndsWithGroupSubType(String endsWithElementSubType) { - this.endsWithGroupSubType = endsWithElementSubType; - } - - /** - * Makes a description of a word to looks for - * @param wordType - */ - WordDescription(WordType wordType){ - type =wordType; - } } /** @@ -112,10 +141,14 @@ * @author dl387 * */ - static class WordRuleDescription { - private final List wordDescriptions = new ArrayList(); + private static class WordRuleDescription { + private final List wordDescriptions; private final WordRule ruleName; private final WordType ruleType; + + List getWordDescriptions() { + return wordDescriptions; + } WordRule getRuleName() { return ruleName; @@ -127,35 +160,27 @@ /** * Creates a wordRule from a wordRule element found in wordRules.xml - * @param wordRuleEl + * @param reader + * @throws XMLStreamException */ - WordRuleDescription(Element wordRuleEl) { - ruleName = WordRule.valueOf(wordRuleEl.getAttributeValue("name")); - ruleType = WordType.valueOf(wordRuleEl.getAttributeValue("type")); - Elements words = wordRuleEl.getChildElements(); - for (int i = 0; i < words.size(); i++) { - Element word = words.get(i); - WordDescription wd = new WordDescription(WordType.valueOf(word.getAttributeValue("type"))); - if (word.getAttribute("value")!=null){ - wd.setValue(word.getAttributeValue("value")); - } - if (word.getAttribute("functionalGroupType")!=null){ - wd.setFunctionalGroupType(word.getAttributeValue("functionalGroupType")); - } - if (word.getAttribute("endsWithRegex")!=null){ - wd.setEndsWithPattern(Pattern.compile(word.getAttributeValue("endsWithRegex") +"$", Pattern.CASE_INSENSITIVE)); - } - if (word.getAttribute("endsWithGroupValueAtr")!=null){ - wd.setEndsWithGroupValueAtr(word.getAttributeValue("endsWithGroupValueAtr")); - } - if (word.getAttribute("endsWithGroupType")!=null){ - wd.setEndsWithGroupType(word.getAttributeValue("endsWithGroupType")); + WordRuleDescription(XMLStreamReader reader) throws XMLStreamException { + List wordDescriptions = new ArrayList(); + ruleName = WordRule.valueOf(reader.getAttributeValue(null, "name")); + ruleType = WordType.valueOf(reader.getAttributeValue(null,"type")); + while (reader.hasNext()) { + int event = reader.next(); + if (event == XMLStreamConstants.START_ELEMENT) { + if (reader.getLocalName().equals("word")) { + wordDescriptions.add(new WordDescription(reader)); + } } - if (word.getAttribute("endsWithGroupSubType")!=null){ - wd.setEndsWithGroupSubType(word.getAttributeValue("endsWithGroupSubType")); + else if (event == XMLStreamConstants.END_ELEMENT) { + if (reader.getLocalName().equals("wordRule")) { + break; + } } - wordDescriptions.add(wd); } + this.wordDescriptions = Collections.unmodifiableList(wordDescriptions); } } @@ -164,315 +189,603 @@ * @param resourceGetter * @throws IOException */ - WordRules(ResourceGetter resourceGetter) throws IOException{ - Element wordRules =resourceGetter.getXMLDocument("wordRules.xml").getRootElement(); - Elements rules = wordRules.getChildElements("wordRule"); - for (int i = 0; i < rules.size(); i++) { - wordRuleList.add (new WordRuleDescription(rules.get(i))); + WordRules(ResourceGetter resourceGetter) throws IOException { + List wordRuleList = new ArrayList(); + XMLStreamReader reader = resourceGetter.getXMLStreamReader("wordRules.xml"); + try { + while (reader.hasNext()) { + if (reader.next() == XMLStreamConstants.START_ELEMENT && + reader.getLocalName().equals("wordRule")) { + wordRuleList.add(new WordRuleDescription(reader)); + } + } } + catch (XMLStreamException e) { + throw new IOException("Parsing exception occurred while reading wordRules.xml", e); + } + finally { + try { + reader.close(); + } catch (XMLStreamException e) { + throw new IOException("Parsing exception occurred while reading wordRules.xml", e); + } + } + this.wordRuleList = Collections.unmodifiableList(wordRuleList); } - /**The wordRules themselves.*/ - private final List wordRuleList = new ArrayList(); - /**Takes a molecule element and places the word elements into wordRule elements - * @param n2sConfig - * * @param moleculeEl A molecule element with word children + * @param n2sConfig * @param allowSpaceRemoval + * @param componentRatios * @throws ParsingException */ - void groupWordsIntoWordRules(NameToStructureConfig n2sConfig, Element moleculeEl, boolean allowSpaceRemoval) throws ParsingException { - List wordEls = XOMTools.getChildElementsWithTagName(moleculeEl, WORD_EL); + void groupWordsIntoWordRules(Element moleculeEl, NameToStructureConfig n2sConfig, boolean allowSpaceRemoval, Integer[] componentRatios) throws ParsingException { + WordRulesInstance instance = new WordRulesInstance(moleculeEl, n2sConfig, allowSpaceRemoval, componentRatios); + List wordEls = moleculeEl.getChildElements(WORD_EL); //note that multiple words in wordEls may be later replaced by a wordRule element for (int i = 0; i wordRuleEls = moleculeEl.getChildElements(); + for (Element wordRuleEl : wordRuleEls) { + if (!wordRuleEl.getName().equals(WORDRULE_EL)){ + throw new ParsingException("Unable to assign wordRule to: " + wordRuleEl.getAttributeValue(VALUE_ATR)); } } } + + private class WordRulesInstance { + private final Element moleculeEl; + private final boolean allowRadicals; + private final boolean allowSpaceRemoval; + private final Integer expectedNumOfComponents; + + WordRulesInstance(Element moleculeEl, NameToStructureConfig n2sConfig, boolean allowSpaceRemoval, Integer[] componentRatios) { + this.moleculeEl = moleculeEl; + this.allowRadicals = n2sConfig.isAllowRadicals(); + this.allowSpaceRemoval = allowSpaceRemoval; + this.expectedNumOfComponents = componentRatios != null ? componentRatios.length : null; + } + + private boolean matchWordRule(List wordEls, int indexOfFirstWord) throws ParsingException { + wordRuleLoop: for (WordRuleDescription wordRuleDesc : wordRuleList) { + int i = indexOfFirstWord; + List wordDescriptions = wordRuleDesc.getWordDescriptions(); + int wordsInWordRule = wordDescriptions.size(); + if (i + wordsInWordRule <= wordEls.size()) {//need sufficient words to match the word rule + for (int j = 0; j < wordsInWordRule; j++) { + Element wordEl = wordEls.get(i + j); + WordDescription wd = wordDescriptions.get(j); + if (!wd.getType().toString().equals(wordEl.getAttributeValue(TYPE_ATR))){ + continue wordRuleLoop;//type mismatch; + } - private boolean matchWordRule(NameToStructureConfig n2sConfig, List wordEls, int indexOfFirstWord, boolean allowSpaceRemoval) throws ParsingException { - wordRuleLoop: for (WordRuleDescription wordRuleDesc : wordRuleList) { - int i =indexOfFirstWord; - int wordsInWordRule = wordRuleDesc.wordDescriptions.size(); - if (i + wordsInWordRule -1 < wordEls.size()){//need sufficient words to match the word rule - for (int j = 0; j < wordsInWordRule; j++) { - Element wordEl = wordEls.get(i+j); - WordDescription wd = wordRuleDesc.wordDescriptions.get(j); - if (!wd.type.toString().equals(wordEl.getAttributeValue(TYPE_ATR))){ - continue wordRuleLoop;//type mismatch; - } - if (wd.getValue() !=null && !wordEl.getAttributeValue(VALUE_ATR).toLowerCase().equals(wd.getValue())){//word string contents mismatch - continue wordRuleLoop; - } - if (wd.functionalGroupType !=null){ - if (WordType.functionalTerm.toString().equals(wordEl.getAttributeValue(TYPE_ATR))){ - Elements children = wordEl.getChildElements(); - Element lastChild = children.get(children.size()-1); - while (lastChild.getChildElements().size()!=0){ - children = lastChild.getChildElements(); - lastChild = children.get(children.size()-1); - } - if (lastChild.getLocalName().equals(CLOSEBRACKET_EL)){ - lastChild = (Element) XOMTools.getPreviousSibling(lastChild); + String functionalGroupTypePredicate = wd.getFunctionalGroupType(); + if (functionalGroupTypePredicate != null) { + if (!WordType.functionalTerm.toString().equals(wordEl.getAttributeValue(TYPE_ATR))){ + continue wordRuleLoop; } - if (lastChild==null){ + Element lastEl = getLastElementInWord(wordEl); + if (lastEl == null) { throw new ParsingException("OPSIN Bug: Cannot find the functional element in a functionalTerm"); } - if (!wd.getFunctionalGroupType().equals(lastChild.getAttributeValue(TYPE_ATR))){ + while (lastEl.getName().equals(CLOSEBRACKET_EL) || lastEl.getName().equals(STRUCTURALCLOSEBRACKET_EL)) { + lastEl = OpsinTools.getPreviousSibling(lastEl); + if (lastEl == null) { + throw new ParsingException("OPSIN Bug: Cannot find the functional element in a functionalTerm"); + } + } + if (!functionalGroupTypePredicate.equals(lastEl.getAttributeValue(TYPE_ATR))) { continue wordRuleLoop; } } - } - if (wd.endsWithPattern !=null){ - if (!wd.endsWithPattern.matcher(wordEl.getAttributeValue(VALUE_ATR)).find()){ + + EndsWithGroup endsWithGroupPredicate = wd.getEndsWithGroup(); + if (endsWithGroupPredicate != null && !endsWithGroupPredicateSatisfied(wordEl, endsWithGroupPredicate)) { continue wordRuleLoop; } - } - if (wd.endsWithGroupValueAtr !=null){ - Element lastGroupInWordRule = getLastGroupInWordRule(wordEl); - if (lastGroupInWordRule==null || !wd.endsWithGroupValueAtr.equals(lastGroupInWordRule.getAttributeValue(VALUE_ATR))){ + + String valuePredicate = wd.getValue(); + if (valuePredicate != null && !wordEl.getAttributeValue(VALUE_ATR).toLowerCase(Locale.ROOT).equals(valuePredicate)){//word string contents mismatch continue wordRuleLoop; } - } - if (wd.endsWithGroupType !=null){ - Element lastGroupInWordRule = getLastGroupInWordRule(wordEl); - if (lastGroupInWordRule==null || !wd.endsWithGroupType.equals(lastGroupInWordRule.getAttributeValue(TYPE_ATR))){ - continue wordRuleLoop; + + Pattern endsWithPatternPredicate = wd.getEndsWithPattern(); + if (endsWithPatternPredicate != null) { + if (!endsWithPatternPredicate.matcher(wordEl.getAttributeValue(VALUE_ATR)).find()){ + continue wordRuleLoop; + } + } + + String endsWithGroupTypePredicate = wd.getEndsWithGroupType(); + if (endsWithGroupTypePredicate != null) { + Element lastGroupInWordRule = getLastGroupInWordRule(wordEl); + if (lastGroupInWordRule == null || !endsWithGroupTypePredicate.equals(lastGroupInWordRule.getAttributeValue(TYPE_ATR))){ + continue wordRuleLoop; + } + } + + String endsWithSubGroupTypePredicate = wd.getEndsWithGroupSubType(); + if (endsWithSubGroupTypePredicate != null) { + Element lastGroupInWordRule = getLastGroupInWordRule(wordEl); + if (lastGroupInWordRule == null || !endsWithSubGroupTypePredicate.equals(lastGroupInWordRule.getAttributeValue(SUBTYPE_ATR))){ + continue wordRuleLoop; + } } } - if (wd.endsWithGroupSubType !=null){ - Element lastGroupInWordRule = getLastGroupInWordRule(wordEl); - if (lastGroupInWordRule==null || !wd.endsWithGroupSubType.equals(lastGroupInWordRule.getAttributeValue(SUBTYPE_ATR))){ + //Word Rule matches! + Element wordRuleEl = new GroupingEl(WORDRULE_EL); + WordRule wordRule = wordRuleDesc.getRuleName(); + wordRuleEl.addAttribute(new Attribute(TYPE_ATR, wordRuleDesc.getRuleType().toString())); + wordRuleEl.addAttribute(new Attribute(WORDRULE_EL, wordRule.toString())); + + /* + * Some wordRules can not be entirely processed at the structure building stage + */ + switch (wordRule) { + case functionGroupAsGroup: + //convert the functional term into a full term + Element functionalWord = wordEls.get(i + wordsInWordRule -1); + if (!functionalWord.getAttributeValue(TYPE_ATR).equals(FUNCTIONALTERM_EL) || wordsInWordRule>2){ + throw new ParsingException("OPSIN bug: Problem with functionGroupAsGroup wordRule"); + } + convertFunctionalGroupIntoGroup(functionalWord); + if (wordsInWordRule==2){ + joinWords(wordEls, wordEls.get(i), functionalWord); + wordsInWordRule =1; + } + wordRuleEl.getAttribute(WORDRULE_ATR).setValue(WordRule.simple.toString()); + break; + case carbonylDerivative: + case acidReplacingFunctionalGroup: + //e.g. acetone 4,4-diphenylsemicarbazone. This is better expressed as a full word as the substituent actually locants onto the functional term + for (int j = 1; j < (wordsInWordRule - 1); j++) { + Element wordEl = wordEls.get(i + j); + if (WordType.substituent.toString().equals(wordEl.getAttributeValue(TYPE_ATR))) { + joinWords(wordEls, wordEls.get(i + j), wordEls.get(i + j + 1)); + wordsInWordRule--; + List functionalTerm = OpsinTools.getDescendantElementsWithTagName(wordEls.get(i + j), FUNCTIONALTERM_EL);//rename functionalTerm element to root + if (functionalTerm.size() != 1){ + throw new ParsingException("OPSIN bug: Problem with "+ wordRule +" wordRule"); + } + functionalTerm.get(0).setName(ROOT_EL); + List functionalGroups = OpsinTools.getDescendantElementsWithTagName(functionalTerm.get(0), FUNCTIONALGROUP_EL);//rename functionalGroup element to group + if (functionalGroups.size() != 1){ + throw new ParsingException("OPSIN bug: Problem with "+ wordRule +" wordRule"); + } + functionalGroups.get(0).setName(GROUP_EL); + wordEls.get(i + j).getAttribute(TYPE_ATR).setValue(WordType.full.toString()); + } + } + break; + case additionCompound: + case oxide: + //is the halide/pseudohalide/oxide actually a counterion rather than covalently bonded + Element possibleElementaryAtomContainingWord = wordEls.get(i); + List elementaryAtoms = OpsinTools.getDescendantElementsWithTagNameAndAttribute(possibleElementaryAtomContainingWord, GROUP_EL, SUBTYPE_ATR, ELEMENTARYATOM_SUBTYPE_VAL); + if (elementaryAtoms.size() == 1) { + Element elementaryAtom = elementaryAtoms.get(0); + ChemEl chemEl1 = getChemElFromElementaryAtomEl(elementaryAtom); + if (wordRule == WordRule.oxide) { + if (wordsInWordRule != 2){ + throw new ParsingException("OPSIN bug: Problem with "+ wordRule +" wordRule"); + } + Element oxideWord = wordEls.get(i + 1); + ChemEl chemEl2 = getChemElFromWordWithFunctionalGroup(oxideWord); + if (!FragmentTools.isCovalent(chemEl1, chemEl2)){ + Element oxideGroup = convertFunctionalGroupIntoGroup(oxideWord); + setOxideStructureAppropriately(oxideGroup, elementaryAtom); + applySimpleWordRule(wordEls, indexOfFirstWord, possibleElementaryAtomContainingWord); + continue wordRuleLoop; + } + } + else { + for (int j = 1; j < wordsInWordRule; j++) { + Element functionalGroup = wordEls.get(i + j); + ChemEl chemEl2 = getChemElFromWordWithFunctionalGroup(functionalGroup); + if (!FragmentTools.isCovalent(chemEl1, chemEl2)) {//use separate word rules for ionic components + boolean specialCaseCovalency = false; + if (chemEl2.isHalogen() && wordsInWordRule == 2) { + switch (chemEl1) { + case Mg: + if (possibleElementaryAtomContainingWord.getChildCount() > 1) { + //treat grignards (i.e. substitutedmagnesium halides) as covalent + specialCaseCovalency = true; + } + break; + case Al: + if (chemEl2 == ChemEl.Cl || chemEl2 == ChemEl.Br || chemEl2 == ChemEl.I) { + specialCaseCovalency = true; + } + break; + case Ti: + if (oxidationNumberOrMultiplierIs(elementaryAtom, functionalGroup, 4) && + (chemEl2 == ChemEl.Cl || chemEl2 == ChemEl.Br || chemEl2 == ChemEl.I)) { + specialCaseCovalency = true; + } + break; + case V: + if (oxidationNumberOrMultiplierIs(elementaryAtom, functionalGroup, 4) && + chemEl2 == ChemEl.Cl) { + specialCaseCovalency = true; + } + break; + case Zr: + case Hf: + if (oxidationNumberOrMultiplierIs(elementaryAtom, functionalGroup, 4) && + chemEl2 == ChemEl.Br) { + specialCaseCovalency = true; + } + break; + case U: + if (oxidationNumberOrMultiplierIs(elementaryAtom, functionalGroup, 6) && + (chemEl2 == ChemEl.F || chemEl2 == ChemEl.Cl)) { + specialCaseCovalency = true; + } + break; + case Np: + case Pu: + if (oxidationNumberOrMultiplierIs(elementaryAtom, functionalGroup, 6) && + chemEl2 == ChemEl.F) { + specialCaseCovalency = true; + } + break; + default: + break; + } + } + else if (chemEl2 == ChemEl.H && wordsInWordRule == 2) { + if (chemEl1 == ChemEl.Al) { + //aluminium hydrides are covalent + specialCaseCovalency = true; + } + } + if (!specialCaseCovalency) { + continue wordRuleLoop; + } + } + } + } + } + break; + case potentialAlcoholEster: + if (expectedNumOfComponents != null && expectedNumOfComponents == moleculeEl.getChildCount()) { + //don't apply this wordRule if doing so makes the number of components incorrect continue wordRuleLoop; } - } + break; + default: + break; + } + + List wordValues = new ArrayList(); + Element parentEl = wordEls.get(i).getParent(); + int indexToInsertAt = parentEl.indexOf(wordEls.get(i)); + for (int j = 0; j < wordsInWordRule; j++) { + Element wordEl = wordEls.remove(i); + wordEl.detach(); + wordRuleEl.addChild(wordEl); + wordValues.add(wordEl.getAttributeValue(VALUE_ATR)); + } + wordRuleEl.addAttribute(new Attribute(VALUE_ATR, StringTools.stringListToString(wordValues, " ")));//The bare string of all the words under this wordRule + parentEl.insertChild(wordRuleEl, indexToInsertAt); + wordEls.add(i, wordRuleEl); + return true; } - //Word Rule matches! - Element wordRuleEl = new Element(WORDRULE_EL); - wordRuleEl.addAttribute(new Attribute(TYPE_ATR, wordRuleDesc.getRuleType().toString())); - wordRuleEl.addAttribute(new Attribute(WORDRULE_EL, wordRuleDesc.getRuleName().toString())); - + } + Element firstWord = wordEls.get(indexOfFirstWord); + if (firstWord.getName().equals(WORD_EL) && WordType.full.toString().equals(firstWord.getAttributeValue(TYPE_ATR))){//No wordRule -->wordRule="simple" + applySimpleWordRule(wordEls, indexOfFirstWord, firstWord); + return false; + } + else if (allowSpaceRemoval && WordType.substituent.toString().equals(firstWord.getAttributeValue(TYPE_ATR))){ /* - * Some wordRules can not be entirely processed at the structure building stage + * substituents may join together or to a full e.g. 2-ethyl toluene -->2-ethyltoluene + * 1-chloro 2-bromo ethane --> 1-chloro-2-bromo ethane then subsequently 1-chloro-2-bromo-ethane */ - WordRule wordRule = wordRuleDesc.getRuleName(); - if (wordRule == WordRule.functionGroupAsGroup){//convert the functional term into a full term - Element functionalWord = wordEls.get(i + wordsInWordRule -1); - if (!functionalWord.getAttributeValue(TYPE_ATR).equals(FUNCTIONALTERM_EL) || wordsInWordRule>2){ - throw new ParsingException("OPSIN bug: Problem with functionGroupAsGroup wordRule"); - } - convertFunctionalGroupIntoGroup(functionalWord); - if (wordsInWordRule==2){ - joinWords(wordEls, i, wordEls.get(i), functionalWord); - wordsInWordRule =1; - } - wordRuleEl.getAttribute(WORDRULE_ATR).setValue(WordRule.simple.toString()); - } - else if (wordRule == WordRule.carbonylDerivative || wordRule == WordRule.acidReplacingFunctionalGroup){//e.g. acetone 4,4-diphenylsemicarbazone. This is better expressed as a full word as the substituent actually locants onto the functional term - if (wordsInWordRule==3){//substituent present - joinWords(wordEls, i+1, wordEls.get(i+1), wordEls.get(i+2)); - wordsInWordRule--; - List functionalTerm = XOMTools.getDescendantElementsWithTagName(wordEls.get(i+1), FUNCTIONALTERM_EL);//rename functionalTerm element to root - if (functionalTerm.size()!=1){ - throw new ParsingException("OPSIN bug: Problem with "+ wordRule +" wordRule"); - } - functionalTerm.get(0).setLocalName(ROOT_EL); - List functionalGroups = XOMTools.getDescendantElementsWithTagName(functionalTerm.get(0), FUNCTIONALGROUP_EL);//rename functionalGroup element to group - if (functionalGroups.size()!=1){ - throw new ParsingException("OPSIN bug: Problem with "+ wordRule +" wordRule"); - } - functionalGroups.get(0).setLocalName(GROUP_EL); - wordEls.get(i+1).getAttribute(TYPE_ATR).setValue(WordType.full.toString()); - } - } - else if (wordRule == WordRule.additionCompound || wordRule == WordRule.oxide){//is the halide/pseudohalide/oxide actually a counterion rather than covalently bonded - Element possibleElementaryAtom = wordEls.get(i); - List elementaryAtoms = XOMTools.getDescendantElementsWithTagNameAndAttribute(possibleElementaryAtom, GROUP_EL, SUBTYPE_ATR, ELEMENTARYATOM_SUBTYPE_VAL); - if (elementaryAtoms.size()==1){ - for (int j = 1; j < wordsInWordRule; j++) { - if (bondWillBeIonic(elementaryAtoms.get(0), wordEls.get(i+j))){//use separate word rules for ionic components - continue wordRuleLoop; - } - } + if (indexOfFirstWord +1 < wordEls.size()){ + Element wordToPotentiallyCombineWith = wordEls.get(indexOfFirstWord +1); + if (WordType.full.toString().equals(wordToPotentiallyCombineWith.getAttributeValue(TYPE_ATR)) || + WordType.substituent.toString().equals(wordToPotentiallyCombineWith.getAttributeValue(TYPE_ATR))){ + joinWords(wordEls, firstWord, wordToPotentiallyCombineWith); + return true; } } + } + else if (WordType.functionalTerm.toString().equals(firstWord.getAttributeValue(TYPE_ATR)) && firstWord.getAttributeValue(VALUE_ATR).toLowerCase(Locale.ROOT).equals("salt")) { + wordEls.remove(indexOfFirstWord); + firstWord.detach(); + if (moleculeEl.getAttribute(ISSALT_ATR) == null) { + moleculeEl.addAttribute(ISSALT_ATR, "yes"); + } + return true; + } + if (allowRadicals && wordEls.size() == 1 && indexOfFirstWord == 0 && firstWord.getName().equals(WORD_EL) && WordType.substituent.toString().equals(firstWord.getAttributeValue(TYPE_ATR))){ + //name is all one substituent, make this a substituent and finish + applySubstituentWordRule(wordEls, indexOfFirstWord, firstWord); + } + return false; + } + private boolean endsWithGroupPredicateSatisfied(Element wordEl, EndsWithGroup endsWithGroupPredicate) throws ParsingException { + Element lastEl = getLastElementInWord(wordEl); + if (lastEl == null) { + return false; + } + String elName = lastEl.getName(); + while (elName.equals(CLOSEBRACKET_EL) || + elName.equals(STRUCTURALCLOSEBRACKET_EL) || + elName.equals(ISOTOPESPECIFICATION_EL)) { + lastEl = OpsinTools.getPreviousSibling(lastEl); + if (lastEl == null) { + return false; + } + elName = lastEl.getName(); + } - - List wordValues = new ArrayList(); - Element parentEl = (Element) wordEls.get(i).getParent(); - int indexToInsertAt = parentEl.indexOf(wordEls.get(i)); - for (int j = 0; j < wordsInWordRule; j++) { - Element wordEl = wordEls.remove(i); - wordEl.detach(); - wordRuleEl.appendChild(wordEl); - wordValues.add(wordEl.getAttributeValue(VALUE_ATR)); - } - wordRuleEl.addAttribute(new Attribute(VALUE_ATR, StringTools.stringListToString(wordValues, " ")));//The bare string of all the words under this wordRule - parentEl.insertChild(wordRuleEl, indexToInsertAt); - wordEls.add(i, wordRuleEl); - return true; + if (endsWithGroupPredicate == EndsWithGroup.acid) { + if (elName.equals(SUFFIX_EL)) { + if (icOrOusAcid.matcher(lastEl.getAttributeValue(VALUE_ATR)).find()) { + return true; + } + } + else if (elName.equals(GROUP_EL)) { + if (lastEl.getAttribute(FUNCTIONALIDS_ATR) != null && icOrOusAcid.matcher(lastEl.getValue()).find()) { + return true; + } + } } + else if (endsWithGroupPredicate == EndsWithGroup.ateGroup) { + if (elName.equals(GROUP_EL)) { + if (lastEl.getAttribute(FUNCTIONALIDS_ATR) != null && ateOrIteOrAmide.matcher(lastEl.getValue()).find()) { + return true; + } + } + else { + while (lastEl != null && elName.equals(SUFFIX_EL)) { + String suffixValAtr = lastEl.getAttributeValue(VALUE_ATR); + if (ateOrIteOrAmide.matcher(suffixValAtr).find() || suffixValAtr.equals("glycoside")) { + return true; + } + //glycoside is not always the last suffix + lastEl = OpsinTools.getPreviousSibling(lastEl, SUFFIX_EL); + } + } + } + return false; } - Element firstWord = wordEls.get(indexOfFirstWord); - if (firstWord.getLocalName().equals(WORD_EL) && WordType.full.toString().equals(firstWord.getAttributeValue(TYPE_ATR))){//No wordRule -->wordRule="simple" - applySimpleWordRule(wordEls, indexOfFirstWord, firstWord); + + private boolean oxidationNumberOrMultiplierIs(Element elementaryAtomEl, Element functionalGroupWord, int expectedVal) throws ParsingException { + List functionalGroups = OpsinTools.getDescendantElementsWithTagName(functionalGroupWord, FUNCTIONALGROUP_EL); + if (functionalGroups.size() != 1) { + throw new ParsingException("OPSIN bug: Unable to find functional group in oxide or addition compound rule"); + } + Element possibleMultiplier = OpsinTools.getPreviousSibling(functionalGroups.get(0)); + if (possibleMultiplier != null && possibleMultiplier.getName().equals(MULTIPLIER_EL)) { + return Integer.parseInt(possibleMultiplier.getAttributeValue(VALUE_ATR)) == expectedVal; + + } + else { + Element possibleOxidationNumber = OpsinTools.getNextSibling(elementaryAtomEl); + if(possibleOxidationNumber != null && possibleOxidationNumber.getName().equals(OXIDATIONNUMBERSPECIFIER_EL)) { + return Integer.parseInt(possibleOxidationNumber.getAttributeValue(VALUE_ATR)) == expectedVal; + } + } return false; } - else if (allowSpaceRemoval && WordType.substituent.toString().equals(firstWord.getAttributeValue(TYPE_ATR))){ - /* - * substituents may join together or to a full e.g. 2-ethyl toluene -->2-ethyltoluene - * 1-chloro 2-bromo ethane --> 1-chloro-2-bromo ethane then subsequently 1-chloro-2-bromo-ethane - */ - if (indexOfFirstWord +1 < wordEls.size()){ - Element wordToPotentiallyCombineWith = wordEls.get(indexOfFirstWord +1); - if (WordType.full.toString().equals(wordToPotentiallyCombineWith.getAttributeValue(TYPE_ATR)) || - WordType.substituent.toString().equals(wordToPotentiallyCombineWith.getAttributeValue(TYPE_ATR))){ - joinWords(wordEls, indexOfFirstWord, firstWord, wordToPotentiallyCombineWith); - return true; + + private Element getLastGroupInWordRule(Element wordEl) { + Element lastEl = getLastElementInWord(wordEl); + if (lastEl.getName().equals(GROUP_EL)) { + return lastEl; + } + else{ + List groups = lastEl.getParent().getChildElements(GROUP_EL); + if (groups.size() > 0) { + return groups.get(groups.size() - 1); } } + return null; } - if (n2sConfig.isAllowRadicals() && wordEls.size()==1 && indexOfFirstWord==0 && firstWord.getLocalName().equals(WORD_EL) && WordType.substituent.toString().equals(firstWord.getAttributeValue(TYPE_ATR))){ - applySubstituentWordRule(wordEls, indexOfFirstWord, firstWord); + + private Element getLastElementInWord(Element wordEl) { + List children = wordEl.getChildElements(); + Element lastChild = children.get(children.size() - 1); + while (lastChild.getChildCount() != 0) { + children = lastChild.getChildElements(); + lastChild = children.get(children.size() - 1); + } + return lastChild; } - return false; - } - private Element getLastGroupInWordRule(Element wordEl) { - Elements children = wordEl.getChildElements(); - Element lastChild = children.get(children.size()-1); - while (lastChild.getChildElements().size()!=0){ - children = lastChild.getChildElements(); - lastChild = children.get(children.size()-1); + private void applySimpleWordRule(List wordEls, int indexOfFirstWord, Element firstWord) { + Element parentEl = firstWord.getParent(); + int indexToInsertAt = parentEl.indexOf(firstWord); + Element wordRuleEl = new GroupingEl(WORDRULE_EL); + wordRuleEl.addAttribute(new Attribute(WORDRULE_ATR, WordRule.simple.toString()));//No wordRule + wordRuleEl.addAttribute(new Attribute(TYPE_ATR, WordType.full.toString())); + wordRuleEl.addAttribute(new Attribute(VALUE_ATR, firstWord.getAttributeValue(VALUE_ATR))); + firstWord.detach(); + wordRuleEl.addChild(firstWord); + wordEls.set(indexOfFirstWord, wordRuleEl); + parentEl.insertChild(wordRuleEl, indexToInsertAt); } - if (lastChild.getLocalName().equals(GROUP_EL)){ - return lastChild; + + + private void applySubstituentWordRule(List wordEls, int indexOfFirstWord, Element firstWord) { + Element parentEl = firstWord.getParent(); + int indexToInsertAt = parentEl.indexOf(firstWord); + Element wordRuleEl = new GroupingEl(WORDRULE_EL); + wordRuleEl.addAttribute(new Attribute(WORDRULE_ATR, WordRule.substituent.toString())); + wordRuleEl.addAttribute(new Attribute(TYPE_ATR, WordType.full.toString())); + wordRuleEl.addAttribute(new Attribute(VALUE_ATR, firstWord.getAttributeValue(VALUE_ATR))); + firstWord.detach(); + wordRuleEl.addChild(firstWord); + wordEls.set(indexOfFirstWord, wordRuleEl); + parentEl.insertChild(wordRuleEl, indexToInsertAt); } - else{ - Elements groups = ((Element)lastChild.getParent()).getChildElements(GROUP_EL); - if (groups.size()>0){ - return groups.get(groups.size()-1); + + /** + * Merges two adjacent words + * The latter word (wordToPotentiallyCombineWith) is merged into the former and removed from wordEls + * @param wordEls + * @param firstWord + * @param wordToPotentiallyCombineWith + * @throws ParsingException + */ + private void joinWords(List wordEls, Element firstWord, Element wordToPotentiallyCombineWith) throws ParsingException { + wordEls.remove(wordToPotentiallyCombineWith); + wordToPotentiallyCombineWith.detach(); + List substituentEls = firstWord.getChildElements(SUBSTITUENT_EL); + if (substituentEls.size()==0){ + throw new ParsingException("OPSIN Bug: Substituent element not found where substituent element expected"); + } + Element finalSubstituent = substituentEls.get(substituentEls.size() - 1); + List finalSubstituentChildren = finalSubstituent.getChildElements(); + if (!finalSubstituentChildren.get(finalSubstituentChildren.size() - 1).getName().equals(HYPHEN_EL)){//add an implicit hyphen if one is not already present + Element implicitHyphen = new TokenEl(HYPHEN_EL, "-"); + finalSubstituent.addChild(implicitHyphen); } + List elementsToMergeIntoSubstituent = wordToPotentiallyCombineWith.getChildElements(); + for (int j = elementsToMergeIntoSubstituent.size() -1 ; j >=0; j--) { + Element el = elementsToMergeIntoSubstituent.get(j); + el.detach(); + OpsinTools.insertAfter(finalSubstituent, el); + } + if (WordType.full.toString().equals(wordToPotentiallyCombineWith.getAttributeValue(TYPE_ATR))){ + firstWord.getAttribute(TYPE_ATR).setValue(WordType.full.toString()); + } + firstWord.getAttribute(VALUE_ATR).setValue(firstWord.getAttributeValue(VALUE_ATR) + wordToPotentiallyCombineWith.getAttributeValue(VALUE_ATR)); } - return null; - } - private void applySimpleWordRule(List wordEls, int indexOfFirstWord, Element firstWord) { - Element parentEl = (Element) firstWord.getParent(); - int indexToInsertAt = parentEl.indexOf(firstWord); - Element wordRuleEl = new Element(WORDRULE_ATR); - wordRuleEl.addAttribute(new Attribute(WORDRULE_ATR, WordRule.simple.toString()));//No wordRule - wordRuleEl.addAttribute(new Attribute(TYPE_ATR, WordType.full.toString())); - wordRuleEl.addAttribute(new Attribute(VALUE_ATR, firstWord.getAttributeValue(VALUE_ATR))); - firstWord.detach(); - wordRuleEl.appendChild(firstWord); - wordEls.set(indexOfFirstWord, wordRuleEl); - parentEl.insertChild(wordRuleEl, indexToInsertAt); - } - + private Element convertFunctionalGroupIntoGroup(Element word) throws ParsingException { + word.getAttribute(TYPE_ATR).setValue(WordType.full.toString()); + List functionalTerms = OpsinTools.getDescendantElementsWithTagName(word, FUNCTIONALTERM_EL); + if (functionalTerms.size() != 1){ + throw new ParsingException("OPSIN Bug: Exactly 1 functionalTerm expected in functionalGroupAsGroup wordRule"); + } + Element functionalTerm = functionalTerms.get(0); + functionalTerm.setName(ROOT_EL); + List functionalGroups = functionalTerm.getChildElements(FUNCTIONALGROUP_EL); + if (functionalGroups.size() != 1){ + throw new ParsingException("OPSIN Bug: Exactly 1 functionalGroup expected in functionalGroupAsGroup wordRule"); + } + Element functionalGroup = functionalGroups.get(0); + functionalGroup.setName(GROUP_EL); + functionalGroup.getAttribute(TYPE_ATR).setValue(SIMPLEGROUP_TYPE_VAL); + functionalGroup.addAttribute(new Attribute(SUBTYPE_ATR, SIMPLEGROUP_SUBTYPE_VAL)); + return functionalGroup; + } + - private void applySubstituentWordRule(List wordEls, int indexOfFirstWord, Element firstWord) { - Element parentEl = (Element) firstWord.getParent(); - int indexToInsertAt = parentEl.indexOf(firstWord); - Element wordRuleEl = new Element(WORDRULE_ATR); - wordRuleEl.addAttribute(new Attribute(WORDRULE_ATR, WordRule.substituent.toString())); - wordRuleEl.addAttribute(new Attribute(TYPE_ATR, WordType.full.toString())); - wordRuleEl.addAttribute(new Attribute(VALUE_ATR, firstWord.getAttributeValue(VALUE_ATR))); - firstWord.detach(); - wordRuleEl.appendChild(firstWord); - wordEls.set(indexOfFirstWord, wordRuleEl); - parentEl.insertChild(wordRuleEl, indexToInsertAt); - } + /** + * Sets the SMILES of the oxide group to be something like [O-2] + * ... unless the oxide group is multiplied and the elementaryAtom has no oxidation states greater 2 + * in which case [O-][O-] would be assumed + * @param oxideGroup + * @param elementaryAtom + */ + private void setOxideStructureAppropriately(Element oxideGroup, Element elementaryAtom) { + boolean chainInterpretation = false; + Integer multiplierVal = null; + Element possibleMultiplier = OpsinTools.getPreviousSibling(oxideGroup); + if (possibleMultiplier != null && + possibleMultiplier.getName().equals(MULTIPLIER_EL)){ + multiplierVal = Integer.parseInt(possibleMultiplier.getAttributeValue(VALUE_ATR)); + if (multiplierVal > 1) { + String commonOxidationStatesAndMax = elementaryAtom.getAttributeValue(COMMONOXIDATIONSTATESANDMAX_ATR); + if (commonOxidationStatesAndMax == null || + Integer.parseInt(commonOxidationStatesAndMax.split(":")[1]) <= 2){ + chainInterpretation = true; + } + } + } - /** - * Takes the list of wordEls, the indice of a word element, that element and the word element following it - * Merges the latter word element into the former element - * @param wordEls - * @param indexOfFirstWord - * @param firstWord - * @param wordToPotentiallyCombineWith - * @throws ParsingException - */ - private void joinWords(List wordEls, int indexOfFirstWord, Element firstWord, Element wordToPotentiallyCombineWith) throws ParsingException { - wordEls.remove(indexOfFirstWord +1); - wordToPotentiallyCombineWith.detach(); - Elements substituentEls = firstWord.getChildElements(SUBSTITUENT_EL); - if (substituentEls.size()==0){ - throw new ParsingException("OPSIN Bug: Substituent element not found where substituent element expected"); - } - Element finalSubstituent = substituentEls.get(substituentEls.size()-1); - Elements finalSubstituentChildren = finalSubstituent.getChildElements(); - if (!finalSubstituentChildren.get(finalSubstituentChildren.size()-1).getLocalName().equals(HYPHEN_EL)){//add an implicit hyphen if one is not already present - Element implicitHyphen = new Element(HYPHEN_EL); - implicitHyphen.appendChild("-"); - finalSubstituent.appendChild(implicitHyphen); - } - Elements elementsToMergeIntoSubstituent = wordToPotentiallyCombineWith.getChildElements(); - for (int j = elementsToMergeIntoSubstituent.size() -1 ; j >=0; j--) { - Element el = elementsToMergeIntoSubstituent.get(j); - el.detach(); - XOMTools.insertAfter(finalSubstituent, el); - } - if (WordType.full.toString().equals(wordToPotentiallyCombineWith.getAttributeValue(TYPE_ATR))){ - firstWord.getAttribute(TYPE_ATR).setValue(WordType.full.toString()); + Attribute value = oxideGroup.getAttribute(VALUE_ATR); + String smiles = value.getValue(); + String element; + if (smiles.equals("O")){ + element = "O"; + } + else if (smiles.equals("S")){ + element = "S"; + } + else if (smiles.startsWith("[Se")){ + element = "Se"; + } + else if (smiles.startsWith("[Te")){ + element = "Te"; + } + else{ + throw new RuntimeException("OPSIN Bug: Unexpected smiles for oxideGroup: " + smiles); + } + if (chainInterpretation){ + StringBuilder sb = new StringBuilder(); + sb.append('['); + sb.append(element); + sb.append("-]"); + for (int i = 2; i < multiplierVal; i++) { + sb.append('['); + sb.append(element); + sb.append(']'); + } + sb.append('['); + sb.append(element); + sb.append("-]"); + value.setValue(sb.toString()); + possibleMultiplier.detach(); + } + else{ + value.setValue("[" + element + "-2]"); + } } - firstWord.getAttribute(VALUE_ATR).setValue(firstWord.getAttributeValue(VALUE_ATR) + wordToPotentiallyCombineWith.getAttributeValue(VALUE_ATR)); - } - - private void convertFunctionalGroupIntoGroup(Element word) throws ParsingException { - word.getAttribute(TYPE_ATR).setValue(WordType.full.toString()); - List functionalTerms = XOMTools.getDescendantElementsWithTagName(word, FUNCTIONALTERM_EL); - if (functionalTerms.size()!=1){ - throw new ParsingException("OPSIN Bug: Exactly 1 functionalTerm expected in functionalGroupAsGroup wordRule"); - } - functionalTerms.get(0).setLocalName(ROOT_EL); - Elements functionalGroups = functionalTerms.get(0).getChildElements(FUNCTIONALGROUP_EL); - if (functionalGroups.size()!=1){ - throw new ParsingException("OPSIN Bug: Exactly 1 functionalGroup expected in functionalGroupAsGroup wordRule"); - } - functionalGroups.get(0).setLocalName(GROUP_EL); - functionalGroups.get(0).getAttribute(TYPE_ATR).setValue(SIMPLEGROUP_TYPE_VAL); - functionalGroups.get(0).addAttribute(new Attribute(SUBTYPE_ATR, SIMPLEGROUP_SUBTYPE_VAL)); - } + - - /** - * Checks whether the bond that will be formed will be ionic by inspection of the SMILES - * @param elementaryAtomEl - * @param functionalWord - * @return - * @throws ParsingException - */ - private boolean bondWillBeIonic(Element elementaryAtomEl, Element functionalWord) throws ParsingException { - String element1 = elementaryAtomEl.getAttributeValue(VALUE_ATR); - if (element1.startsWith("[")){ - element1 = element1.substring(1, element1.length()-1); - } - List functionalGroups = XOMTools.getDescendantElementsWithTagName(functionalWord, FUNCTIONALGROUP_EL); - if (functionalGroups.size()!=1){ - throw new ParsingException("OPSIN bug: Unable to find functional group in oxide or addition compound rule"); - } - String smiles = functionalGroups.get(0).getAttributeValue(VALUE_ATR); - String element2 =""; - for (int i = 0; i = 'A' && ch <='Z') || (ch >= 'a' && ch <='z')) { + if (i + 1 < len) { + char ch2 = elementStr.charAt(i + 1); + if ((ch2 >= 'A' && ch2 <='Z') || (ch2 >= 'a' && ch2 <='z')) { + //two letter element + elementStr = elementStr.substring(i, i + 2); + break; + } + } + //one letter element + elementStr = elementStr.substring(i, i + 1); + break; + } } - break; } + return ChemEl.valueOf(elementStr); + } + + private ChemEl getChemElFromWordWithFunctionalGroup(Element functionalWord) throws ParsingException { + List functionalGroups = OpsinTools.getDescendantElementsWithTagName(functionalWord, FUNCTIONALGROUP_EL); + if (functionalGroups.size() != 1){ + throw new ParsingException("OPSIN bug: Unable to find functional group in oxide or addition compound rule"); + } + String smiles = functionalGroups.get(0).getAttributeValue(VALUE_ATR); + String elementStr = ""; + for (int i = 0; i < smiles.length(); i++) { + if (Character.isUpperCase(smiles.charAt(i))){ + elementStr += smiles.charAt(i); + if (i + 1 wordRules = XOMTools.getDescendantElementsWithTagName(parse, WORDRULE_EL); + List wordRules = OpsinTools.getDescendantElementsWithTagName(parse, WORDRULE_EL); for (Element wordRule : wordRules) { WordRule wordRuleVal = WordRule.valueOf(wordRule.getAttributeValue(WORDRULE_ATR)); if (wordRuleVal == WordRule.divalentFunctionalGroup){ @@ -39,71 +36,79 @@ /** * Corrects cases like "methylethyl ether" to "methyl ethyl ether" - * @param wordRule + * @param divalentFunctionalGroupWordRule */ private void checkAndCorrectOmittedSpacesInDivalentFunctionalGroupRule(Element divalentFunctionalGroupWordRule) { - List substituentWords = XOMTools.getChildElementsWithTagNameAndAttribute(divalentFunctionalGroupWordRule, WORD_EL, TYPE_ATR, SUBSTITUENT_TYPE_VAL); - if (substituentWords.size()==1){//potentially has been "wrongly" interpreted e.g. ethylmethyl ketone is more likely to mean ethyl methyl ketone - Elements children =substituentWords.get(0).getChildElements(); - if (children.size()==2){ - Element firstSubstituent =(Element)children.get(0); + List substituentWords = OpsinTools.getChildElementsWithTagNameAndAttribute(divalentFunctionalGroupWordRule, WORD_EL, TYPE_ATR, SUBSTITUENT_TYPE_VAL); + if (substituentWords.size() == 1){//potentially has been "wrongly" interpreted e.g. ethylmethyl ketone is more likely to mean ethyl methyl ketone + List children = OpsinTools.getChildElementsWithTagNames(substituentWords.get(0), new String[]{SUBSTITUENT_EL, BRACKET_EL}); + if (children.size() == 2) { + Element firstSubOrbracket = children.get(0); //rule out correct usage e.g. diethyl ether and locanted substituents e.g. 2-methylpropyl ether - if (firstSubstituent.getAttribute(LOCANT_ATR)==null && firstSubstituent.getAttribute(MULTIPLIER_ATR)==null){ - Element subToMove =children.get(1); - subToMove.detach(); - Element newWord =new Element(WORD_EL); - newWord.addAttribute(new Attribute(TYPE_ATR, SUBSTITUENT_TYPE_VAL)); - newWord.appendChild(subToMove); - XOMTools.insertAfter(substituentWords.get(0), newWord); + if (firstSubOrbracket.getAttribute(LOCANT_ATR) == null && firstSubOrbracket.getAttribute(MULTIPLIER_ATR) == null) { + Element firstGroup = findRightMostGroupInSubBracketOrRoot(firstSubOrbracket); + Fragment firstFrag = firstGroup.getFrag(); + if (hasSingleMonoValentCarbonOrSiliconRadical(firstFrag)) { + Element subToMove =children.get(1); + subToMove.detach(); + Element newWord =new GroupingEl(WORD_EL); + newWord.addAttribute(new Attribute(TYPE_ATR, SUBSTITUENT_TYPE_VAL)); + newWord.addChild(subToMove); + OpsinTools.insertAfter(substituentWords.get(0), newWord); + } } } } } - - + /** * Corrects cases like methyl-2-ethylacetate --> methyl 2-ethylacetate * @param wordRule * @throws StructureBuildingException */ private void checkAndCorrectOmittedSpaceEster(Element wordRule) throws StructureBuildingException { - Elements words = wordRule.getChildElements(WORD_EL); - if (words.size()!=1){ + List words = wordRule.getChildElements(WORD_EL); + if (words.size() != 1) { return; } - Element word =words.get(0); + Element word = words.get(0); String wordRuleContents = wordRule.getAttributeValue(VALUE_ATR); - if (matchAteOrIteEnding.matcher(wordRuleContents).find()){ - List roots = XOMTools.getChildElementsWithTagName(word, ROOT_EL); - if (roots.size()==1){ - Element rootGroup = roots.get(0).getFirstChildElement(GROUP_EL); - if (AMINOACID_TYPE_VAL.equals(rootGroup.getAttributeValue(TYPE_ATR))){ - return;//amino acids are implicitly N locanted - } - Fragment rootFrag = state.xmlFragmentMap.get(rootGroup); + if (matchAteOrIteEnding.matcher(wordRuleContents).find()) { + List children = OpsinTools.getChildElementsWithTagNames(word, new String[]{SUBSTITUENT_EL, BRACKET_EL, ROOT_EL}); + if (children.size() >= 2) { + Element rootEl = children.get(children.size() - 1); + Element rootGroup = findRightMostGroupInSubBracketOrRoot(rootEl); + Fragment rootFrag = rootGroup.getFrag(); int functionalAtomsCount = rootFrag.getFunctionalAtomCount(); - if (functionalAtomsCount >0){ - List substituentsAndBrackets = XOMTools.getChildElementsWithTagNames(word, new String[]{SUBSTITUENT_EL, BRACKET_EL}); - if (substituentsAndBrackets.size()==0){ + int rootMultiplier = 1; + { + String rootElMultiplierAtrVal = rootEl.getAttributeValue(MULTIPLIER_ATR); + if (rootElMultiplierAtrVal != null) { + rootMultiplier = Integer.parseInt(rootElMultiplierAtrVal); + functionalAtomsCount *= rootMultiplier; + } + } + if (functionalAtomsCount > 0){ + List substituents = children.subList(0, children.size() - 1); + int substituentCount = substituents.size(); + if (substituentCount == 1 && rootMultiplier > 1) { return; } - Element firstChild = substituentsAndBrackets.get(0); + Element firstChild = substituents.get(0); if (!checkSuitabilityOfSubstituentForEsterFormation(firstChild, functionalAtomsCount)){ return; } - if (substituentsAndBrackets.size()>1 && (allBarFirstSubstituentHaveLocants(substituentsAndBrackets) || insufficientSubstitutableHydrogenForSubstition(substituentsAndBrackets, rootFrag))){ + String multiplierValue = firstChild.getAttributeValue(MULTIPLIER_ATR); + if (specialCaseWhereEsterPreferred(findRightMostGroupInSubBracketOrRoot(firstChild), multiplierValue, rootGroup, substituentCount)) { + transformToEster(wordRule, firstChild); + } + else if (substituentCount > 1 && + (allBarFirstSubstituentHaveLocants(substituents) || insufficientSubstitutableHydrogenForSubstitution(substituents, rootFrag, rootMultiplier))){ transformToEster(wordRule, firstChild); } - else if (substituentsAndBrackets.size()==1){ - String multiplierValue = firstChild.getAttributeValue(MULTIPLIER_ATR); - int multiplier = 1; - if (multiplierValue!=null){ - multiplier= Integer.parseInt(multiplierValue); - } - if (specialCaseWhereEsterPreferred(getRightMostGroup(firstChild), multiplierValue, wordRuleContents) || - substitutionWouldBeAmbiguous(rootGroup, rootFrag, multiplier)){ - transformToEster(wordRule, firstChild); - } + else if ((substituentCount == 1 || rootMultiplier > 1) && substitutionWouldBeAmbiguous(rootFrag, multiplierValue)) { + //either 1 substituent or multiplicative nomenclature (in the multiplicative nomenclature case many substituents will not have locants) + transformToEster(wordRule, firstChild); } } } @@ -122,25 +127,26 @@ return true; } - private boolean insufficientSubstitutableHydrogenForSubstition(List substituentsAndBrackets, Fragment frag) { - int substitutableHydrogens = getAtomForEachSubstitutableHydrogen(frag).size(); + private boolean insufficientSubstitutableHydrogenForSubstitution(List substituentsAndBrackets, Fragment frag, int rootMultiplier) { + int substitutableHydrogens = getAtomForEachSubstitutableHydrogen(frag).size() * rootMultiplier; for (int i = 1; i < substituentsAndBrackets.size(); i++) { Element subOrBracket = substituentsAndBrackets.get(i); - Fragment f = state.xmlFragmentMap.get(getRightMostGroup(subOrBracket)); + Fragment f = findRightMostGroupInSubBracketOrRoot(subOrBracket).getFrag(); String multiplierValue = subOrBracket.getAttributeValue(MULTIPLIER_ATR); int multiplier = 1; - if (multiplierValue!=null){ - multiplier= Integer.parseInt(multiplierValue); + if (multiplierValue != null){ + multiplier = Integer.parseInt(multiplierValue); } substitutableHydrogens -= (getTotalOutAtomValency(f) * multiplier); } - int firstFragSubstitutableHydrogenRequired = getTotalOutAtomValency(state.xmlFragmentMap.get(getRightMostGroup(substituentsAndBrackets.get(0)))); - String multiplierValue = substituentsAndBrackets.get(0).getAttributeValue(MULTIPLIER_ATR); + Element potentialEsterSub = substituentsAndBrackets.get(0); + int firstFragSubstitutableHydrogenRequired = getTotalOutAtomValency(findRightMostGroupInSubBracketOrRoot(potentialEsterSub).getFrag()); + String multiplierValue = potentialEsterSub.getAttributeValue(MULTIPLIER_ATR); int multiplier = 1; - if (multiplierValue!=null){ - multiplier= Integer.parseInt(multiplierValue); + if (multiplierValue != null){ + multiplier = Integer.parseInt(multiplierValue); } - if (substitutableHydrogens >=0 && (substitutableHydrogens - (firstFragSubstitutableHydrogenRequired * multiplier)) <0){ + if (substitutableHydrogens >=0 && (substitutableHydrogens - (firstFragSubstitutableHydrogenRequired * multiplier)) < 0){ return true; } return false; @@ -155,77 +161,91 @@ } /** - * Ester form preferred when mono is used and when an alkyl chain is used on formate/acetate - * e.g. ethylacetate + * Ester form preferred when: + * mono is used on substituent + * alkyl chain is used on formate/acetate e.g. ethylacetate + * Root is carbamate, >=2 substituents, and this is the only word rule + * (ester and non-ester carbamates differ only by whether or not there is a space, heuristically the ester is almost always intended under these conditions) * @param substituentGroupEl * @param multiplierValue - * @param wordRuleContents + * @param rootGroup + * @param numOfSubstituents * @return */ - private boolean specialCaseWhereEsterPreferred(Element substituentGroupEl, String multiplierValue, String wordRuleContents) { - if (multiplierValue!=null && Integer.parseInt(multiplierValue)==1){ + private boolean specialCaseWhereEsterPreferred(Element substituentGroupEl, String multiplierValue, Element rootGroup, int numOfSubstituents) { + if (multiplierValue != null && Integer.parseInt(multiplierValue) == 1){ return true; } - if (substituentGroupEl.getAttributeValue(TYPE_ATR).equals(CHAIN_TYPE_VAL) && ALKANESTEM_SUBTYPE_VAL.equals(substituentGroupEl.getAttributeValue(SUBTYPE_ATR))){ - String potentialString = "(?i)" + substituentGroupEl.getValue() + "yl[\\-]?(form|methan|acet|ethan)[o]?ate"; - if (wordRuleContents.matches(potentialString)){ + String rootGroupName = rootGroup.getParent().getValue(); + if (substituentGroupEl.getAttributeValue(TYPE_ATR).equals(CHAIN_TYPE_VAL) && + ALKANESTEM_SUBTYPE_VAL.equals(substituentGroupEl.getAttributeValue(SUBTYPE_ATR))) { + if (substituentGroupEl.getParent().getValue().matches(substituentGroupEl.getValue() + "yl-?") && + rootGroupName.matches(".*(form|methan|acet|ethan)[o]?ate?")) { + return true; + } + } + if ((rootGroupName.endsWith("carbamate") || rootGroupName.endsWith("carbamat")) && numOfSubstituents >= 2) { + Element temp = substituentGroupEl.getParent(); + while (temp.getParent() != null) { + temp = temp.getParent(); + } + if (temp.getChildElements(WORDRULE_EL).size() == 1) { return true; } } return false; } - private boolean substitutionWouldBeAmbiguous(Element rootGroup, Fragment frag, int multiplier) { - if (multiplier ==1 && (rootGroup.getAttribute(DEFAULTINID_ATR)!=null || rootGroup.getAttribute(DEFAULTINLOCANT_ATR)!=null)){ + private boolean substitutionWouldBeAmbiguous(Fragment frag, String multiplierValue) { + int multiplier = 1; + if (multiplierValue != null){ + multiplier = Integer.parseInt(multiplierValue); + } + if (multiplier == 1 && frag.getDefaultInAtom() != null) { return false; } List atomForEachSubstitutableHydrogen = getAtomForEachSubstitutableHydrogen(frag); - StereoAnalyser analyzer = new StereoAnalyser(frag); - Set uniqueEnvironments = new HashSet(); - for (Atom a : atomForEachSubstitutableHydrogen) { - uniqueEnvironments.add(analyzer.getAtomEnvironmentNumber(a)); - } - if (atomForEachSubstitutableHydrogen.size()==multiplier){ + if (atomForEachSubstitutableHydrogen.size() == multiplier){ return false; } - if (uniqueEnvironments.size()==1 && (multiplier==1 || multiplier == atomForEachSubstitutableHydrogen.size()-1)){ + StereoAnalyser analyser = new StereoAnalyser(frag); + Set uniqueEnvironments = new HashSet(); + for (Atom a : atomForEachSubstitutableHydrogen) { + uniqueEnvironments.add(AmbiguityChecker.getAtomEnviron(analyser, a)); + } + if (uniqueEnvironments.size() == 1 && (multiplier == 1 || multiplier == atomForEachSubstitutableHydrogen.size() - 1)){ return false; } return true; } private boolean checkSuitabilityOfSubstituentForEsterFormation(Element subOrBracket, int rootFunctionalAtomsCount) { - if (subOrBracket.getAttribute(LOCANT_ATR)!=null){ + if (subOrBracket.getAttribute(LOCANT_ATR) != null){ return false; } - Fragment rightMostGroup = state.xmlFragmentMap.get(getRightMostGroup(subOrBracket)); - if (rightMostGroup.getOutAtomCount() != 1 || rightMostGroup.getOutAtom(0).getValency()!=1){ + Fragment rightMostGroup = findRightMostGroupInSubBracketOrRoot(subOrBracket).getFrag(); + if (!hasSingleMonoValentCarbonOrSiliconRadical(rightMostGroup)) { return false; } String multiplierStr = subOrBracket.getAttributeValue(MULTIPLIER_ATR); - if (multiplierStr!=null){ + if (multiplierStr != null) { int multiplier = Integer.parseInt(multiplierStr); - if (multiplier > rootFunctionalAtomsCount){ + if (multiplier > rootFunctionalAtomsCount) { return false; } } return true; } - - /** - * Returns the right most group - * @param subOrBracket - * @return - */ - private Element getRightMostGroup (Element subOrBracket) { - Element group; - if (subOrBracket.getLocalName().equals(BRACKET_EL)){ - group = StructureBuildingMethods.findRightMostGroupInBracket(subOrBracket); - } - else{ - group = subOrBracket.getFirstChildElement(GROUP_EL); + + private boolean hasSingleMonoValentCarbonOrSiliconRadical(Fragment frag) { + if (frag.getOutAtomCount() == 1) { + OutAtom outAtom = frag.getOutAtom(0); + if (outAtom.getValency() == 1 && + (outAtom.getAtom().getElement() == ChemEl.C || outAtom.getAtom().getElement() == ChemEl.Si)) { + return true; + } } - return group; + return false; } private List getAtomForEachSubstitutableHydrogen(Fragment frag) { @@ -246,15 +266,15 @@ private void transformToEster(Element parentSimpleWordRule, Element substituentOrBracket) throws StructureBuildingException { parentSimpleWordRule.getAttribute(WORDRULE_ATR).setValue(WordRule.ester.toString()); - Elements childElsOfSub = substituentOrBracket.getChildElements(); + List childElsOfSub = substituentOrBracket.getChildElements(); Element lastChildElOfSub =childElsOfSub.get(childElsOfSub.size()-1); - if (lastChildElOfSub.getLocalName().equals(HYPHEN_EL)){ + if (lastChildElOfSub.getName().equals(HYPHEN_EL)){ lastChildElOfSub.detach(); } substituentOrBracket.detach(); - Element newSubstituentWord = new Element(WORD_EL); + Element newSubstituentWord = new GroupingEl(WORD_EL); newSubstituentWord.addAttribute(new Attribute(TYPE_ATR, SUBSTITUENT_TYPE_VAL)); - newSubstituentWord.appendChild(substituentOrBracket); + newSubstituentWord.addChild(substituentOrBracket); parentSimpleWordRule.insertChild(newSubstituentWord, 0); String multiplierStr = substituentOrBracket.getAttributeValue(MULTIPLIER_ATR); if (multiplierStr!=null){ @@ -262,7 +282,7 @@ int multiplier = Integer.parseInt(multiplierStr); for (int i = 1; i < multiplier; i++) { Element clone = state.fragManager.cloneElement(state, newSubstituentWord); - XOMTools.insertAfter(newSubstituentWord, clone); + OpsinTools.insertAfter(newSubstituentWord, clone); } } } diff -Nru opsin-1.5.0/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/WordTools.java opsin-2.3.1/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/WordTools.java --- opsin-1.5.0/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/WordTools.java 2013-07-21 14:02:29.000000000 +0000 +++ opsin-2.3.1/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/WordTools.java 2017-07-23 20:55:18.000000000 +0000 @@ -1,7 +1,6 @@ package uk.ac.cam.ch.wwmm.opsin; import java.util.ArrayList; -import java.util.LinkedList; import java.util.List; import static uk.ac.cam.ch.wwmm.opsin.OpsinTools.*; @@ -18,9 +17,8 @@ * @param parseTokensList * @param chemicalName * @return - * @throws ParsingException */ - static List splitIntoParseWords(List parseTokensList, String chemicalName) throws ParsingException { + static List splitIntoParseWords(List parseTokensList, String chemicalName) { List wellFormedParseTokens = new ArrayList();//these are all in the same word as would be expected List> splitParseTokensForEachParseTokens = new ArrayList>(); /* @@ -115,11 +113,12 @@ * @return A List of lists of annotations, each list corresponds to a substituent/maingroup/functionalTerm */ static List> chunkAnnotations(List annots) { - LinkedList> chunkList = new LinkedList>(); + List> chunkList = new ArrayList>(); List currentTerm = new ArrayList(); for (Character annot : annots) { currentTerm.add(annot); - if (annot.equals(END_OF_SUBSTITUENT) || annot.equals(END_OF_MAINGROUP) || annot.equals(END_OF_FUNCTIONALTERM)) { + char ch = annot; + if (ch == END_OF_SUBSTITUENT || ch == END_OF_MAINGROUP || ch == END_OF_FUNCTIONALTERM) { chunkList.add(currentTerm); currentTerm = new ArrayList(); } diff -Nru opsin-1.5.0/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/XmlDeclarations.java opsin-2.3.1/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/XmlDeclarations.java --- opsin-1.5.0/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/XmlDeclarations.java 2013-07-21 14:02:29.000000000 +0000 +++ opsin-2.3.1/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/XmlDeclarations.java 2017-07-23 20:55:18.000000000 +0000 @@ -6,21 +6,21 @@ * */ class XmlDeclarations { - + //TODO are all these types and subtypes actually a good idea considering the vast majority are never used? - + /* - * The container XML elements. These are generated by OPSIN + * The container XML elements. These are generated by OPSIN */ /**Define a scope for determining what group a substituent should bond to*/ static final String BRACKET_EL ="bracket"; /**Contains a functional group or class. These terms typically effect the chosen wordRule for the name*/ static final String FUNCTIONALTERM_EL ="functionalTerm"; - + /**The top most element in OPSIN's parse tree. As a name can describe multiple molecules the same is confusingly true of this element*/ static final String MOLECULE_EL ="molecule"; - + /**Contains a substituent. A substituent will after the ComponentProcessor contain one group*/ static final String SUBSTITUENT_EL = "substituent"; @@ -29,62 +29,65 @@ /**Contains brackets/substituents/root. Generally these correspond to words in the original chemical name (unless erroneous/omitted spaces were present)*/ static final String WORD_EL ="word"; - + /**Contains words/wordRules. The value of the wordRule indicates how the StructureBuilder should process its children*/ static final String WORDRULE_EL ="wordRule"; - + /* * The token XML elements. These are generally produced by the parser from the tokenised chemical name * Some are produced by OPSIN in the ComponentGenerator/ComponentProcessor */ - + + /**Adds a hydrogen to an unsaturated system, this is hydrogen that is added due to a suffix and is expressed in a locant e.g. 1(2H) */ + static final String ADDEDHYDROGEN_EL ="addedHydrogen"; + /**A component of an alkaneStem e.g. [octa][hexaconta][tetract]ane will have three alkaneStemComponents*/ static final String ALKANESTEMCOMPONENT ="alkaneStemComponent"; - + /**Something like tert/iso/sec Modifies an alkaneStem in the ComponentGenerator*/ static final String ALKANESTEMMODIFIER_EL ="alkaneStemModifier"; /**An annulene. Converted to a group by the ComponentGenerator*/ static final String ANNULEN_EL ="annulen"; - + /**A bridge described in SMILES for used on rings*/ static final String FUSEDRINGBRIDGE_EL ="fusedRingBridge"; - + /**An O that indicates that the preceding alkaneStem is in fact a bridge*/ static final String BRIDGEFORMINGO_EL ="bridgeFormingO"; /**A locant indicating the positions for a glycosidic linkage. The first locant will point to an alpha carbon * Also used to indicate joining of nucleosyl groups*/ - static final String BIOCHEMICALLINKAGE_EL ="biochemicalLinkage"; + static final String BIOCHEMICALLINKAGE_EL ="biochemicalLinkage"; /**Indicates the size of the ring in a carbohydrate e.g. furanose = 5*/ static final String CARBOHYDRATERINGSIZE_EL ="carbohydrateRingSize"; /**A charge specifier e.g. (2+). Value is the charge to set something to*/ static final String CHARGESPECIFIER_EL ="chargeSpecifier"; - + /**Created by the ComponentProcessor. Something like the acetic acid in benzene-1,3,5-triacetic acid*/ static final String CONJUNCTIVESUFFIXGROUP_EL ="conjunctiveSuffixGroup"; - + /**Used by the ComponentGenerator to group elements into bracket elements*/ static final String CLOSEBRACKET_EL ="closebracket"; - + /**Used by the ComponentGenerator to modify alkanes into cycloalkanes*/ static final String CYCLO_EL ="cyclo"; - + /** A delta used to indicate the position of a double bond in older nomenclature*/ static final String DELTA_EL ="delta"; - - /** Used in amino acid and carbohydrate nomenclature to indicate stereochemistry*/ - static final String DLSTEREOCHEMISTRY_EL ="dlStereochemistry"; + + /**A fractional multiplier e.g. hemi*/ + static final String FRACTIONALMULTIPLIER_EL ="fractionalMultiplier"; /**A functional Class such as acid. Does not correspond to a fragment*/ static final String FUNCTIONALCLASS_EL ="functionalClass"; - + /**A functional group such as alcohol or sulfone. Describes a fragment*/ static final String FUNCTIONALGROUP_EL ="functionalGroup"; - + /**Currently just poly or oligo for polymers*/ static final String FUNCTIONALMODIFIER_EL ="functionalModifier"; @@ -93,37 +96,37 @@ /**Define a scope for determining what group a substituent should bond to*/ static final String GROUP_EL ="group"; - + /**A heteroatom. Could be part of a Hantzsch Widman ring or a replacement prefix*/ static final String HETEROATOM_EL ="heteroatom"; /**Adds a hydrogen to an unsaturated system (hydro/perhydro)*/ static final String HYDRO_EL ="hydro"; - + /**One of the systematic hydrocarbon fused ring series e.g. tetralene, pentalene. Converted to a group by the ComponentGenerator*/ static final String HYDROCARBONFUSEDRINGSYSTEM_EL ="hydrocarbonFusedRingSystem"; /**Adds a hydrogen to an unsaturated system to indicate what atoms are saturated in a system where not all atoms with spare valency can form double bonds e.g. e.g. 2H-pyran*/ static final String INDICATEDHYDROGEN_EL ="indicatedHydrogen"; - - /**Adds a hydrogen to an unsaturated system, this is hydrogen that is added due to a suffix and is expressed in a locant e.g. 1(2H) */ - static final String ADDEDHYDROGEN_EL ="addedHydrogen"; - + + /**Specifies that one of more atoms are enriched with a particular isotope*/ + static final String ISOTOPESPECIFICATION_EL ="isotopeSpecification"; + /**A hyphen between two substituents. Used as hint that the two substituents do not join together*/ static final String HYPHEN_EL ="hyphen"; - + /**ine as in the end of an aminoAcid. Has no meaning*/ static final String INE_EL ="ine"; - + /**An infix. This performs functionalReplacement on a suffix*/ static final String INFIX_EL ="infix"; /**Indicates that a heteroatom or atom should be in a specific valency*/ static final String LAMBDACONVENTION_EL ="lambdaConvention"; - + /**A locant e.g. where a substituent should attach*/ static final String LOCANT_EL ="locant"; - + /**Used by the ComponentGenerator to group elements into bracket elements*/ static final String OPENBRACKET_EL ="openbracket"; @@ -132,13 +135,13 @@ /**Describes the number of spiro centres in a poly cyclic spiro system*/ static final String POLYCYCLICSPIRO_EL ="polyCyclicSpiro"; - + /**A locant indicating through which atoms a multiplied parent in multiplicative nomenclature is connected*/ static final String MULTIPLICATIVELOCANT_EL ="multiplicativeLocant"; - + /**A multiplier e.g. indicating multiplication of a heteroatom or substituent*/ static final String MULTIPLIER_EL ="multiplier"; - + /**e.g. (III), Specifies the oxidation number of an atom. Value is the oxidation number to set something to*/ static final String OXIDATIONNUMBERSPECIFIER_EL ="oxidationNumberSpecifier"; @@ -147,13 +150,13 @@ /**Used to indicate how many rings are in a ring assembly*/ static final String RINGASSEMBLYMULTIPLIER_EL ="ringAssemblyMultiplier"; - + /**A spiro system. Converted to a group by the ComponentGenerator*/ static final String SPIRO_EL ="spiro"; - + /**A locant that seperates components of a spiro system*/ static final String SPIROLOCANT_EL ="spiroLocant"; - + /**Something like R/S/E/Z. Indicates stereochemical configuration*/ static final String STEREOCHEMISTRY_EL ="stereoChemistry"; @@ -162,19 +165,19 @@ /**Present in complicated nomenclature to avoid ambiguity*/ static final String STRUCTURALOPENBRACKET_EL ="structuralOpenBracket"; - + /**Indicates replacement of a group by hydrogen e.g. deoxy means replace OH with H*/ static final String SUBTRACTIVEPREFIX_EL ="subtractivePrefix"; - + /**A suffix e.g. amide, al, yl etc.*/ static final String SUFFIX_EL ="suffix"; - + /**Something like sulfon/carbo/carbox that modifies a following suffix*/ static final String SUFFIXPREFIX_EL ="suffixPrefix"; - + /**ene/yne, indicated that a double/triple bond should be formed at a saturated location*/ static final String UNSATURATOR_EL ="unsaturator"; - + /**A vonBaeyer system. Converted to a group by the ComponentGenerator*/ static final String VONBAEYER_EL ="vonBaeyer"; @@ -183,98 +186,128 @@ * Some are produced by OPSIN in the ComponentGenerator/ComponentProcessor */ + /**The semantic meaning of the token. Exact meaning is dependent on the element type e.g. SMILES for a group but a number for a multiplier*/ static final String VALUE_ATR ="value"; - static final String VALTYPE_ATR = "valType"; - static final String LABELS_ATR = "labels"; - static final String FUSEDRINGNUMBERING_ATR = "fusedRingNumbering"; - static final String DEFAULTINLOCANT_ATR = "defaultInLocant"; - static final String DEFAULTINID_ATR = "defaultInID"; - static final String OUTIDS_ATR = "outIDs"; - - static final String ALPHABETACLOCKWISEATOMORDERING_ATR="alphaBetaClockWiseAtomOrdering"; + + /**The type of the token. Possible values are enumerated with strings ending in _TYPE_VAL */ + static final String TYPE_ATR = "type"; + + /**The subType of the token. Possible values are enumerated with strings ending in _SUBTYPE_VAL */ + static final String SUBTYPE_ATR = "subType"; + + /**Whether the group can be additively bonded to. e.g. thio */ static final String ACCEPTSADDITIVEBONDS_ATR = "acceptsAdditiveBonds"; + + /**Used to add a higher order bond at a position that can be subsequently specified. + * Syntax: semicolon delimited list of the format: orderOfBond space ("id"|"locant"|"defaultId"|"defaultLocant") space (id|locant) */ + static final String ADDBOND_ATR = "addBond"; + + /**Used to add a group at a position that can be subsequently specified + * Syntax: semicolon delimited list of the format: SMILESofGroupToBeAdded space ("id"|"locant"|"defaultId"|"defaultLocant") space (id|locant) [space locantLabel]. */ + static final String ADDGROUP_ATR = "addGroup"; + + /**Used to set a heteroatom at a position that can be subsequently specified + * Syntax: semicolon delimited list of the format: elementOfAtom space ("id"|"locant"|"defaultId"|"defaultLocant") space (id|locant). */ + static final String ADDHETEROATOM_ATR = "addHeteroAtom"; + + /**Another value that the token takes. e.g. for suffix tokens that add two suffixes to the molecule*/ + static final String ADDITIONALVALUE_ATR = "additionalValue"; + + /**Listed in a clockwise order, the locants of the atoms that define a pseudo 2D plane for alpha/beta stereochemistry */ + static final String ALPHABETACLOCKWISEATOMORDERING_ATR="alphaBetaClockWiseAtomOrdering"; + + /**For elements, the typical oxidation states (comma separated) then a colon and the maximum oxidation station*/ + static final String COMMONOXIDATIONSTATESANDMAX_ATR = "commonOxidationStatesAndMax"; + + /**The ID of the atom which by default an incoming fragment should connect to. ID is relative to this particular fragment (first atom =1) */ + static final String DEFAULTINID_ATR = "defaultInID"; + + /**The locant of the atom which by default an incoming fragment should connect to**/ + static final String DEFAULTINLOCANT_ATR = "defaultInLocant"; + + /**Works like the locant attribute but refers to the atoms OPSIN ID. Will be overridden by the locant/locantId attribute*/ + static final String DEFAULTLOCANTID_ATR = "defaultLocantID"; + + /**A comma separated list of locants that are expected in front of a group for either xylene-like nomenclature or as indirect locants*/ + static final String FRONTLOCANTSEXPECTED_ATR = "frontLocantsExpected"; + + /**A comma separated list of relative IDs at which to add functionalAtoms*/ + static final String FUNCTIONALIDS_ATR = "functionalIDs"; + + /**Numbering to use when ring is part of a fused ring system */ + static final String FUSEDRINGNUMBERING_ATR = "fusedRingNumbering"; - /**Works like a locant but refers to the atoms OPSIN id. Will be overridden by the locant/locantId attribute*/ - static final String DEFAULTLOCANTID_ATR = "defaultLocantID"; + /**Semi-colon delimited list of labels for * atoms, where the * atoms represent generic groups e.g. Alkyl*/ + static final String HOMOLOGY_ATR = "homology"; + /**Indicates that the substituent can either be -X- or X= depending on context cf. imino or methylene*/ static final String IMINOLIKE_ATR = "iminoLike"; - + /**The functional replacement specified by an infix to be performed on the suffix*/ static final String INFIX_ATR = "infix"; - /**Indicates that an element has been multiplied. Prevents badly assigning indirect locants*/ - static final String MULTIPLIED_ATR = "multiplied"; + /**Defines the locants for which a radical will connect to another group in multiplicative nomenclature e.g. in 2,2'-methylenedipyridine the 2,2' become inlocants of the pyridine*/ + static final String INLOCANTS_ATR = "inLocants"; - /**A comma separated list of relative ids at which to add functionalAtoms*/ - static final String FUNCTIONALIDS_ATR = "functionalIDs"; - static final String ADDGROUP_ATR = "addGroup"; - static final String ADDHETEROATOM_ATR = "addHeteroAtom"; - static final String ADDBOND_ATR = "addBond"; - - /**Can the substituent be implicitly bracketted to a previous substitutent e.g. methylaminobenzene --> (methylamino)benzene as amino has this atr*/ - static final String USABLEASJOINER_ATR = "usableAsAJoiner"; - - /**A comma separated list of locants that are expected in front of a group for either xylene-like nomenclature or as indirect locants*/ - static final String FRONTLOCANTSEXPECTED_ATR = "frontLocantsExpected"; - - /** Used as a fudge for some hydrogen esters e.g. dihydrogenphosphate*/ - static final String NUMBEROFFUNCTIONALATOMSTOREMOVE_ATR = "numberOfFunctionalAtomsToRemove"; - - /**A comma seperated list of relatives ids indicating where to add suffix/es*/ - static final String SUFFIXAPPLIESTO_ATR = "suffixAppliesTo"; - - /**A relatives id indicating at what position to attach a suffix to by default*/ - static final String SUFFIXAPPLIESTOBYDEFAULT_ATR = "suffixAppliesToByDefault"; - static final String COMMONOXIDATIONSTATESANDMAX_ATR = "commonOxidationStatesAndMax"; - static final String ADDITIONALVALUE_ATR = "additionalValue"; - static final String LOCANT_ATR = "locant"; - - /**Works like a locant but refers to the atoms OPSIN id*/ - static final String LOCANTID_ATR = "locantID"; - - - static final String TYPE_ATR = "type"; - static final String SUBTYPE_ATR = "subType"; + /**Determined by the {@link ComponentProcessor}. True if a fragment has more than two radical positions e.g. ethan-1,2-diyl not ethanylidene*/ + static final String ISAMULTIRADICAL_ATR = "isAMultiRadical"; + + /**Was the word salt encountered indicating that a salt was expected? */ + static final String ISSALT_ATR = "isSalt"; + + /**Slash delimited list of locants. List must be the same length as number of atoms. Multiple locants can be given to an atom by comma delimiting them*/ + static final String LABELS_ATR = "labels"; - /**Defines the locants for which a radical will connect to another group in multiplicative nomenclature e.g. in 2,2'-methylenedipyridine the 2,2' become inlocants of the pyridine*/ - static final String INLOCANTS_ATR = "inLocants"; - - /**Determined by the prsStructreBuilder. True if a fragment has more than two radical positions e.g. ethan-1,2-diyl not ethanylidene*/ - static final String ISAMULTIRADICAL_ATR = "isAMultiRadical"; - /**Added to a heteroatom or LAMBDACONVENTION_EL to indicate the desired valency*/ - static final String LAMBDA_ATR = "lambda"; - + static final String LAMBDA_ATR = "lambda"; + + /**Locant used when deciding where to apply an operation*/ + static final String LOCANT_ATR = "locant"; + + /**Works like a locant but refers to the atom's OPSIN id*/ + static final String LOCANTID_ATR = "locantID"; + + /**Indicates that this trivial name has the opposite D/L stereochemistry to others in its class i.e. L- for carbohydrates or D- for amino acids*/ + static final String NATURALENTISOPPOSITE_ATR ="naturalEntIsOpposite"; + + /** Used as a fudge for some hydrogen esters e.g. dihydrogenphosphate*/ + static final String NUMBEROFFUNCTIONALATOMSTOREMOVE_ATR = "numberOfFunctionalAtomsToRemove"; + + /**Indicates that an element has been multiplied. Prevents badly assigning indirect locants*/ + static final String MULTIPLIED_ATR = "multiplied"; + /**Indicates how many times a bracket/substituent should be multiplied*/ static final String MULTIPLIER_ATR ="multiplier"; - + /** The name that was inputted into OPSIN's parser. Attribute of molecule */ - static final String NAME_ATR = "name"; - + static final String NAME_ATR = "name"; + + /**A comma separated list of relative IDs at which to add OutAtoms*/ + static final String OUTIDS_ATR = "outIDs"; + /**Indicates that a substituent/bracket has been processed by StructureBuildingMethods*/ static final String RESOLVED_ATR ="resolved"; - - /**Indicates that the natural enantiomer is the opposite that is expected for the class of compound e.g. a natural L sugar*/ - static final String NATURALENTISOPPOSITE_ATR ="naturalEntIsOpposite"; - + /**Placed on a word rule if explicit stoichiometry has been provided. Value is always an integer */ - static final String STOICHIOMETRY_ATR = "stoichiometry"; - + static final String STOICHIOMETRY_ATR = "stoichiometry"; + /** Holds the value of any tokens for which XML was not generated by the parser e.g. an optional e. Multiple elided tokens will be concatenated*/ static final String SUBSEQUENTUNSEMANTICTOKEN_ATR ="subsequentUnsemanticToken"; - + + /**A comma separated list of relatives IDs indicating where to add suffix/es*/ + static final String SUFFIXAPPLIESTO_ATR = "suffixAppliesTo"; + + /**A relatives ID indicating at what position to attach a suffix to by default*/ + static final String SUFFIXAPPLIESTOBYDEFAULT_ATR = "suffixAppliesToByDefault"; + /**Added by the ComponentGenerator to a suffix*/ - static final String SUFFIXPREFIX_ATR = "suffixPrefix"; - + static final String SUFFIXPREFIX_ATR = "suffixPrefix"; + + /**Can the substituent be implicitly bracketed to a previous substitutent e.g. methylaminobenzene --> (methylamino)benzene as amino has this attribute*/ + static final String USABLEASJOINER_ATR = "usableAsAJoiner"; + /**The wordRule that a wordRule element corresponds to*/ static final String WORDRULE_ATR ="wordRule"; - - /* - * The values the valType attribute can take - */ - - /**A SMILES string is the value attribute. The SMILES parser of OPSIN is not entirely complete and additional supports a few things that are not supported by standard SMILES*/ - static final String SMILES_VALTYPE_VAL = "SMILES"; /* * The values the type attribute can take @@ -282,172 +315,218 @@ */ /**A term like amide or hydrazide that replaces a functional hydroxy group*/ static final String ACIDREPLACINGFUNCTIONALGROUP_TYPE_VAL ="acidReplacingFunctionalGroup"; - + /**A trivial carboxylic acid. These by default do not have their acid groups which are then added on using suffixes*/ static final String ACIDSTEM_TYPE_VAL ="acidStem"; - - /**This stereochemistry element conveys alpha/beta stereochemisty*/ + + /**This stereochemistry element conveys alpha/beta stereochemistry*/ static final String ALPHA_OR_BETA_TYPE_VAL ="alphaOrBeta"; - + /**An aminoAcid. These by default do not have their acid groups which are then added on using suffixes. Notably these suffixes do NOT correspond to tokens in the input chemical name!*/ static final String AMINOACID_TYPE_VAL ="aminoAcid"; - + /**A subtractive prefix that removes a terminal chalcogen and forms an intramolecular bridge to another*/ static final String ANHYDRO_TYPE_VAL ="anhydro"; - + + /**This stereochemistry element conveys axial stereochemistry + * These indicate the postion of groups are an axis/plane/helix. This is expressed by the descriptors: M, P, Ra, Sa, Rp, Sp*/ + static final String AXIAL_TYPE_VAL ="axial"; + /**A normal multiplier e.g. di*/ static final String BASIC_TYPE_VAL ="basic"; - + + /**An isotopeSpecification using boughton system nomenclature*/ + static final String BOUGHTONSYSTEM_TYPE_VAL ="boughtonSystem"; + /**A locant enclosed in square brackets e.g. [5]*/ static final String BRACKETEDLOCANT_TYPE_VAL ="bracketedLocant"; - + /**This stereochemistry element specifies stereochemistry in a carbohydrate e.g. gluco is r/l/r/r (position of hydroxy in a fischer projection)*/ static final String CARBOHYDRATECONFIGURATIONPREFIX_TYPE_VAL ="carbohydrateConfigurationalPrefix"; - + /**Groups formed in accordance with carbohydrate nomenclature */ static final String CARBOHYDRATE_TYPE_VAL ="carbohydrate"; - + /**Indicates the group should be acyclic*/ static final String CHAIN_TYPE_VAL ="chain"; - + /**This suffix modifies charge*/ static final String CHARGE_TYPE_VAL ="charge"; - - /**This stereochemistry element conveys cis/trans stereochemisty*/ + + /**This stereochemistry element conveys cis/trans stereochemistry*/ static final String CISORTRANS_TYPE_VAL ="cisOrTrans"; - - /**This stereochemistry element conveys R/S stereochemisty*/ + + /**This stereochemistry element conveys R/S stereochemistry*/ static final String R_OR_S_TYPE_VAL ="RorS"; - - /**This stereochemistry element conveys E/Z stereochemisty*/ + + /**This stereochemistry element conveys E/Z stereochemistry*/ static final String E_OR_Z_TYPE_VAL ="EorZ"; - + /**This group is a sulfur/selenium/tellurium acid with the acidic hydroxy missing*/ static final String CHALCOGENACIDSTEM_TYPE_VAL ="chalcogenAcidStem"; - + /**A subtractive prefix that removes a hydrogen to covert a hydroxy into a carbonyl or convert a bond to a double/triple bond*/ static final String DEHYDRO_TYPE_VAL ="dehydro"; - + /**A subtractive prefix that removes a terminal hydroxy like atom*/ static final String DEOXY_TYPE_VAL ="deoxy"; - + /**A functional group describing a divalent group*/ static final String DIVALENTGROUP_TYPE_VAL ="diValentGroup"; - + + /** This stereochemsitry element indicates the configuration of an amino acid/carbohydrate relative to glyceraldehyde*/ + static final String DLSTEREOCHEMISTRY_TYPE_VAL ="dlStereochemistry"; + + /**This stereochemistry element conveys endo/exo/syn/anti stereochemistry + * These indicate relative orientation of groups attached to non-bridgehead atoms in a bicyclo[x.y.z]alkane (x >= y > z > 0)*/ + static final String ENDO_EXO_SYN_ANTI_TYPE_VAL ="endoExoSynAnti"; + /**A group that is functional class e.g. O for anhydride*/ static final String FUNCTIONALCLASS_TYPE_VAL ="functionalClass"; - + /**A multiplier for groups of terms e.g. bis*/ static final String GROUP_TYPE_VAL ="group"; - + /**An implicit bracket. Implicit brackets are added where a bracket is needed to give the intended meaning*/ static final String IMPLICIT_TYPE_VAL ="implicit"; - + /**This suffix adds a radical to the preceding group e.g. yl, oyl*/ static final String INLINE_TYPE_VAL ="inline"; - + + /**An isotopeSpecification using IUPAC nomenclature*/ + static final String IUPACSYSTEM_TYPE_VAL ="iupacSystem"; + /**This functional group is monovalent e.g. alcohol*/ static final String MONOVALENTGROUP_TYPE_VAL ="monoValentGroup"; - + /**This functional group is monovalent and describes a specific compound e.g. cyanide*/ static final String MONOVALENTSTANDALONEGROUP_TYPE_VAL ="monoValentStandaloneGroup"; - + /**A non carboxylic acid e.g. phosphoric*/ static final String NONCARBOXYLICACID_TYPE_VAL ="nonCarboxylicAcid"; + /**This stereochemistry element describes the direction that plane polarised light is rotated*/ + static final String OPTICALROTATION_TYPE_VAL ="opticalRotation"; + /**Indicates the locant was made from an ortho/meta/para term*/ static final String ORTHOMETAPARA_TYPE_VAL ="orthoMetaPara"; - - /**This stereochemistry element conveys relative cis/trans stereochemisty e.g. r-1, c-2, t-3*/ + + /**This stereochemistry element conveys relative cis/trans stereochemistry e.g. r-1, c-2, t-3*/ static final String RELATIVECISTRANS_TYPE_VAL ="relativeCisTrans"; - + /**Indicates the group should be, at least in part, cyclic*/ static final String RING_TYPE_VAL ="ring"; - + /**Indicates a group that does not allow suffixes*/ static final String SIMPLEGROUP_TYPE_VAL ="simpleGroup"; - + /**Groups that do not have any special rules for suffix handling*/ static final String STANDARDGROUP_TYPE_VAL ="standardGroup"; - + /**A bracket containing R/S/E/Z descriptors*/ static final String STEREOCHEMISTRYBRACKET_TYPE_VAL ="stereochemistryBracket"; - + /**Indicates a group that is a substituent*/ static final String SUBSTITUENT_TYPE_VAL ="substituent"; - + /**A locant that also indicated the addition of hydrogen e.g.2(1H); not used to locant onto another group*/ static final String ADDEDHYDROGENLOCANT_TYPE_VAL ="addedHydrogenLocant"; - + /**Indicates a group that is a suffix*/ static final String SUFFIX_TYPE_VAL ="suffix"; - + /**A suffix that does not add a radical, hence will be present only on the root group */ static final String ROOT_TYPE_VAL ="root"; - + /**A multiplier for a Von Baeyer system e.g. bi in bicyclo*/ static final String VONBAEYER_TYPE_VAL ="VonBaeyer"; - - + + /* * The values the subType attribute can take * subType is expected to be present at minimum on all group elements */ + + /**The stem of an alkane e.g. "eth" */ static final String ALKANESTEM_SUBTYPE_VAL ="alkaneStem"; + /**An anhydride functional term e.g. "thioanhydride"*/ static final String ANHYDRIDE_SUBTYPE_VAL ="anhydride"; - static final String ARYLGROUP_SUBTYPE_VAL ="arylGroup"; + /**An aryl subsituent or stem e.g. "phenyl", "styr" */ static final String ARYLSUBSTITUENT_SUBTYPE_VAL ="arylSubstituent"; - /**Nucleotides/nucleosides/natural products*/ + //FIXME ideally carbohydrates and nucleotides/nucleosides/natural products should have a common type or subtype + /**Nucleotides/nucleosides/natural products. Carbohydrates can be detected by {@link XmlDeclarations#CARBOHYDRATE_TYPE_VAL}*/ static final String BIOCHEMICAL_SUBTYPE_VAL ="biochemical"; - /**A group representing a straight chain carbohydrate of a certain length with undefined stereochemistry e.g. hex in hexose */ - static final String SYSTEMATICCARBOHYDRATESTEMALDOSE_SUBTYPE_VAL ="systematicCarbohydrateStemAldose"; - /**A group representing a straight chain carbohydrate of a certain length with undefined stereochemistry e.g. hex in hex-2-ulose */ - static final String SYSTEMATICCARBOHYDRATESTEMKETOSE_SUBTYPE_VAL ="systematicCarbohydrateStemKetose"; - - /**A trivial carbohydrate stem */ + /**A trivial carbohydrate stem for an aldose e.g. "galact"*/ static final String CARBOHYDRATESTEMALDOSE_SUBTYPE_VAL ="carbohydrateStemAldose"; + /**A trivial carbohydrate stem for a ketose e.g. "fruct"*/ static final String CARBOHYDRATESTEMKETOSE_SUBTYPE_VAL ="carbohydrateStemKetose"; - - /**e.g. imide, lactam, sultam etc.*/ + /**A suffix that forms a cycle e.g. imide, lactam, sultam*/ static final String CYCLEFORMER_SUBTYPE_VAL ="cycleformer"; + /**A hydrocarbon stem that is typically followed by an unsaturator e.g. "adamant" */ static final String CYCLICUNSATURABLEHYDROCARBON_SUBTYPE_VAL ="cyclicUnsaturableHydrocarbon"; - /**amido/hydrazido/imido/nitrido*/ + /**Replacmenet terms that are not substituents e.g. amido/hydrazido/imido/nitrido*/ static final String DEDICATEDFUNCTIONALREPLACEMENTPREFIX_SUBTYPE_VAL = "dedicatedFunctionalReplacementPrefix"; + /**An atom e.g. "lithium" */ static final String ELEMENTARYATOM_SUBTYPE_VAL ="elementaryAtom"; + /**An amino acid that ends in an e.g. tryptoph */ static final String ENDINAN_SUBTYPE_VAL ="endInAn"; + /**An amino acid that ends in ic e.g. aspart */ static final String ENDINIC_SUBTYPE_VAL ="endInIc"; + /**An amino acid that ends in ine e.g. alan */ static final String ENDININE_SUBTYPE_VAL ="endInIne"; + /**A substituent that is expected to form a bridge e.g. "epoxy", "epiimino" */ static final String EPOXYLIKE_SUBTYPE_VAL ="epoxyLike"; + /**A ring that will be fused onto another ring e.g. "benzo", "pyrido", "pyridino" */ + static final String FUSIONRING_SUBTYPE_VAL ="fusionRing"; + /**A group that can be suffixed e.g. "hydrazin" */ static final String GROUPSTEM_SUBTYPE_VAL ="groupStem"; + /**A halide or pseudo halide e.g. "bromo", "cyano". Can be functional replacment terms when preceding certain non-carboxylic acids */ static final String HALIDEORPSEUDOHALIDE_SUBTYPE_VAL = "halideOrPseudoHalide"; + /**The stem of a hantzch Widman ring sytem e.g. "an", "ol", "olidin" */ static final String HANTZSCHWIDMAN_SUBTYPE_VAL ="hantzschWidman"; + /**A heteroatom hydride e.g. "az" "sulf" (will be followed by an unsaturator, may be preceded by a multiplier to form the heteroatom equivalent of alkanes)*/ static final String HETEROSTEM_SUBTYPE_VAL ="heteroStem"; - static final String FUSEDRING_SUBTYPE_VAL ="fusedRing"; - static final String FUSIONRING_SUBTYPE_VAL ="fusionRing"; + /**A group with no special properties Similar to: {@link XmlDeclarations#NONE_SUBTYPE_VAL}*/ static final String SIMPLEGROUP_SUBTYPE_VAL ="simpleGroup"; - static final String SUFFIX_SUBTYPE_VAL ="suffix"; + /**A substituent which intrinsically forms multiple bonds e.g. "siloxane", "thio" */ static final String MULTIRADICALSUBSTITUENT_SUBTYPE_VAL ="multiRadicalSubstituent"; + /**A non-carboxylic acid which cannot form a substituent e.g. "bor" */ static final String NOACYL_SUBTYPE_VAL ="noAcyl"; + /**A group with no special properties Similar to: {@link XmlDeclarations#SIMPLEGROUP_SUBTYPE_VAL}*/ static final String NONE_SUBTYPE_VAL ="none"; - /**oxido/sulfido/selenido/tellurido*/ + /**oxido/sulfido/selenido/tellurido These are handled similarly to oxide e.g. might give -[O-] or =O*/ static final String OXIDOLIKE_SUBTYPE_VAL ="oxidoLike"; + /**A term indicating replacement of all substitutable hydrogens by a halogen e.g. "perchloro" */ static final String PERHALOGENO_SUBTYPE_VAL ="perhalogeno"; + /** phospho and other very related substituents. Strongly prefer forming bonds to hydroxy groups */ static final String PHOSPHO_SUBTYPE_VAL ="phospho"; + /**A ring group e.g. "pyridin" */ + static final String RING_SUBTYPE_VAL ="ring"; + /** A component of a salt e.g "hydrate", "2HCl" */ + static final String SALTCOMPONENT_SUBTYPE_VAL ="saltComponent"; + /**A substitutent with no suffix e.g. "amino" */ static final String SIMPLESUBSTITUENT_SUBTYPE_VAL ="simpleSubstituent"; + /**A substituent expecting a suffix e.g."bor" "vin" */ static final String SUBSTITUENT_SUBTYPE_VAL ="substituent"; + /**A group representing a straight chain carbohydrate of a certain length with undefined stereochemistry e.g. hex in hexose */ + static final String SYSTEMATICCARBOHYDRATESTEMALDOSE_SUBTYPE_VAL ="systematicCarbohydrateStemAldose"; + /**A group representing a straight chain carbohydrate of a certain length with undefined stereochemistry e.g. hex in hex-2-ulose */ + static final String SYSTEMATICCARBOHYDRATESTEMKETOSE_SUBTYPE_VAL ="systematicCarbohydrateStemKetose"; + /**A suffix that attaches to the end of a chain e.g. "aldehyde", "ic acid" */ static final String TERMINAL_SUBTYPE_VAL ="terminal"; + /**An acid that when suffixed with yl gives an acyl group e.g. "acet" */ static final String YLFORACYL_SUBTYPE_VAL ="ylForAcyl"; + /**An acid that has undefined meaning when suffixed with yl */ static final String YLFORNOTHING_SUBTYPE_VAL ="ylForNothing"; + /**An acid that when suffixed with yl gives an alkyl group e.g. "laur" */ static final String YLFORYL_SUBTYPE_VAL ="ylForYl"; - //TODO java doc this - /**Requests that no labelling should be applied */ - static final String NONE_LABELS_VAL ="none";//TODO no labels attribute should probably mean no labelling + static final String NONE_LABELS_VAL ="none"; /**Requests that labelling be done like a fused ring. It is assumed that the order of the atoms is locant 1 as the first atom*/ static final String FUSEDRING_LABELS_VAL ="fusedRing"; - + /**Requests that labelling be 1, 2, 3 etc. It is assumed that the order of the atoms is locant 1 as the first atom*/ static final String NUMERIC_LABELS_VAL ="numeric"; @@ -458,17 +537,7 @@ * See suffixRules.dtd */ static final String SUFFIXRULES_RULE_EL = "rule"; - static final String SUFFIXRULES_ADDGROUP_EL = "addgroup"; - static final String SUFFIXRULES_ADDSUFFIXPREFIXIFNONEPRESENTANDCYCLIC_EL = "addSuffixPrefixIfNonePresentAndCyclic"; - static final String SUFFIXRULES_SETOUTATOM_EL = "setOutAtom"; - static final String SUFFIXRULES_CHANGECHARGE_EL = "changecharge"; - static final String SUFFIXRULES_ADDFUNCTIONALATOMSTOHYDROXYGROUPS_EL = "addFunctionalAtomsToHydroxyGroups"; - static final String SUFFIXRULES_CHARGEHYDROXYGROUPS_EL = "chargeHydroxyGroups"; - static final String SUFFIXRULES_REMOVETERMINALOXYGEN_EL = "removeTerminalOxygen"; - static final String SUFFIXRULES_CONVERTHYDROXYGROUPSTOOUTATOMS_EL = "convertHydroxyGroupsToOutAtoms"; - static final String SUFFIXRULES_CONVERTHYDROXYGROUPSTOPOSITIVECHARGE_EL = "convertHydroxyGroupsToPositiveCharge"; static final String SUFFIXRULES_VALUE_ATR = "value"; - static final String SUFFIXRULES_SUBTYPE_ATR = "value"; static final String SUFFIXRULES_SMILES_ATR = "SMILES"; static final String SUFFIXRULES_LABELS_ATR = "labels"; static final String SUFFIXRULES_FUNCTIONALIDS_ATR = "functionalIDs"; @@ -478,7 +547,8 @@ static final String SUFFIXRULES_OUTVALENCY_ATR = "outValency"; static final String SUFFIXRULES_CHARGE_ATR = "charge"; static final String SUFFIXRULES_PROTONS_ATR = "protons"; - + static final String SUFFIXRULES_ELEMENT_ATR = "element"; + /** * See suffixApplicability.dtd */ @@ -487,19 +557,4 @@ static final String SUFFIXAPPLICABILITY_TYPE_ATR = "type"; static final String SUFFIXAPPLICABILITY_VALUE_ATR = "value"; static final String SUFFIXAPPLICABILITY_SUBTYPE_ATR = "subType"; - - /** - * CML Elements/Attributes/NameSpace - */ - static final String CML_NAMESPACE = "http://www.xml-cml.org/schema"; - - /**A CML element used to hold atomParity information e.g. 1 or -1 and atomRefs4 */ - static final String CML_ATOMPARITY_EL = "atomParity"; - - /**A CML element used to hold bondStereo information e.g. C (cis) or T (trans) and atomRefs4 */ - static final String CML_BONDSTEREO_EL = "bondStereo"; - - /**An attribute holding the ids of the 4 atoms that are needed to define a bondStereo of a double bond or an atom parity. - * Note that the ids are prefixed with an 'a'*/ - static final String CML_ATOMREFS4_ATR = "atomRefs4"; } diff -Nru opsin-1.5.0/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/XOMFormatter.java opsin-2.3.1/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/XOMFormatter.java --- opsin-1.5.0/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/XOMFormatter.java 2013-07-21 14:02:29.000000000 +0000 +++ opsin-2.3.1/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/XOMFormatter.java 1970-01-01 00:00:00.000000000 +0000 @@ -1,55 +0,0 @@ -package uk.ac.cam.ch.wwmm.opsin; - -import java.io.ByteArrayOutputStream; -import java.io.IOException; - -import nu.xom.Document; -import nu.xom.Element; -import nu.xom.Serializer; - -/**Turns a XOM Element into a pretty indented string. - * - * @author ptc24 - * - */ -public class XOMFormatter { - - private final ByteArrayOutputStream outStream = new ByteArrayOutputStream(); - private Serializer serializer; - - /**Sets up a new XOMFormatter. - * - */ - public XOMFormatter() { - super(); - try { - serializer = new Serializer(outStream, "ISO-8859-1"); - serializer.setIndent(4); - serializer.setMaxLength(300); - } - catch (IOException ex) { - System.err.println(ex); - } - } - - /**Converts an Element to an indented string. - * - * @param elem The Element to convert to a string. - * @return The string. - */ - public String elemToString(Element elem) { - try { - // Grrr protected methods grrr - outStream.reset(); - // Put the element in a document... - serializer.write(new Document(new Element(elem))); - // Then return the document, destroying the evidence - // that it ever was a document. - return outStream.toString().substring(45); - } catch (IOException ex) { - ex.printStackTrace(); - } - return null; - } - -} diff -Nru opsin-1.5.0/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/XOMTools.java opsin-2.3.1/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/XOMTools.java --- opsin-1.5.0/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/XOMTools.java 2013-07-21 14:02:29.000000000 +0000 +++ opsin-2.3.1/opsin-core/src/main/java/uk/ac/cam/ch/wwmm/opsin/XOMTools.java 1970-01-01 00:00:00.000000000 +0000 @@ -1,521 +0,0 @@ -package uk.ac.cam.ch.wwmm.opsin; - -import java.util.ArrayList; -import java.util.LinkedList; -import java.util.List; - -import nu.xom.Element; -import nu.xom.Elements; -import nu.xom.Node; -import nu.xom.ParentNode; -import nu.xom.Text; - -/** - * Accessory functions for the manipulation of XOM Nodes/Elements - * Only those that are necessary for OPSIN's execution - * @author ptc24 - * @author dl387 - */ -public final class XOMTools { - - /**Gets the next sibling of a given node. - * - * @param node The reference node. - * @return The next Sibling, or null. - */ - public static Node getNextSibling(Node node) { - ParentNode parent = node.getParent(); - int i = parent.indexOf(node); - if (i+1 >= parent.getChildCount()) return null; - return parent.getChild(i+1); - } - - /**Gets the first next sibling of a given node whose tagname matches the given string. - * - * @param current The reference node. - * @param tagName The tagname of a node to look for - * @return The matched next Sibling, or null. - */ - public static Node getNextSibling(Node current, String tagName) { - Element matchedElement =null; - while (true) { - Element next = (Element) getNextSibling(current); - if (next != null) { - if (next.getLocalName().equals(tagName)){ - matchedElement=next; - break; - } - else{ - current = next; - } - } else { - break; - } - } - return matchedElement; - } - - - /**Gets the previous sibling of a given node. - * - * @param node The reference node. - * @return The previous Sibling, or null. - */ - public static Node getPreviousSibling(Node node) { - ParentNode parent = node.getParent(); - int i = parent.indexOf(node); - if (i==0) return null; - return parent.getChild(i-1); - } - - - /**Gets the first previous sibling of a given node whose tagname matches the given string. - * - * @param current The reference node. - * @param tagName The tagname of a node to look for - * @return The matched previous Sibling, or null. - */ - public static Node getPreviousSibling(Node current, String tagName) { - Element matchedElement =null; - while (true) { - Element prev = (Element) getPreviousSibling(current); - if (prev != null) { - if (prev.getLocalName().equals(tagName)){ - matchedElement=prev; - break; - } - else{ - current = prev; - } - } else { - break; - } - } - return matchedElement; - } - - /**Inserts a node so that it occurs before a reference node. The new node - * must not currently have a parent. - * - * @param node The reference node. - * @param newNode The new node to insert. - */ - public static void insertBefore(Node node, Node newNode) { - ParentNode parent = node.getParent(); - int i = parent.indexOf(node); - parent.insertChild(newNode, i); - } - - /**Inserts a node so that it occurs after a reference node. The new node - * must not currently have a parent. - * - * @param node The reference node. - * @param newNode The new node to insert. - */ - public static void insertAfter(Node node, Node newNode) { - ParentNode parent = node.getParent(); - int i = parent.indexOf(node); - parent.insertChild(newNode, i+1); - } - - /** - * Gets the next node. This element need not be a sibling - * @param node: starting node - * @return - */ - public static Node getNext(Node node) { - Element parent = (Element) node.getParent(); - if (parent == null || parent.getLocalName().equals(XmlDeclarations.MOLECULE_EL)){ - return null; - } - int index = parent.indexOf(node); - if (index +1 >=parent.getChildCount()) return getNext(parent);//reached end of element - Element next =(Element) parent.getChild(index+1); - Elements children =next.getChildElements(); - while (children.size()!=0){ - next =children.get(0); - children =next.getChildElements(); - } - return next; - } - - /** - * Gets the previous node. This element need not be a sibling - * @param node: starting node - * @return - */ - public static Node getPrevious(Node node) { - Element parent = (Element) node.getParent(); - if (parent == null || parent.getLocalName().equals(XmlDeclarations.MOLECULE_EL)){ - return null; - } - int index = parent.indexOf(node); - if (index ==0) return getPrevious(parent);//reached beginning of element - Element previous =(Element) parent.getChild(index-1); - Elements children =previous.getChildElements(); - while (children.size()!=0){ - previous =children.get(children.size()-1); - children =previous.getChildElements(); - } - return previous; - } - - /** - * Sets the first text child of the group to the newName - * Throws an exception if the first child is not a Text node - * @param group - * @param newName - */ - public static void setTextChild(Element group, String newName){ - Node textNode =group.getChild(0); - if (textNode instanceof Text){ - ((Text)textNode).setValue(newName); - } - else{ - throw new IllegalArgumentException("No Text Child Found!"); - } - } - - /** - * Returns an arrayList containing sibling elements of the given type after the given element. - * These elements need not be continuous - * @param currentElem: the element to look for following siblings of - * @param type: the "localname" of the element type desired - * @return - */ - public static List getNextSiblingsOfType(Element currentElem, String type) { - List laterSiblingElementsOfType= new ArrayList(); - Element parent =(Element) currentElem.getParent(); - if (parent==null){ - return laterSiblingElementsOfType; - } - Elements potentialMatches =parent.getChildElements(type); - int indexOfCurrentElem =parent.indexOf(currentElem); - for (int i = 0; i < potentialMatches.size(); i++) { - if (parent.indexOf(potentialMatches.get(i)) > indexOfCurrentElem){ - laterSiblingElementsOfType.add(potentialMatches.get(i)); - } - } - return laterSiblingElementsOfType; - } - - /** - * Returns an arrayList containing sibling elements of the given type after the given element. - * @param currentElem: the element to look for following siblings of - * @param type: the "localname" of the element type desired - * @return - */ - public static List getNextAdjacentSiblingsOfType(Element currentElem, String type) { - List siblingElementsOfType= new ArrayList(); - Element parent =(Element) currentElem.getParent(); - if (parent==null){ - return siblingElementsOfType; - } - Element nextSibling = (Element) XOMTools.getNextSibling(currentElem); - while (nextSibling !=null && nextSibling.getLocalName().equals(type)){ - siblingElementsOfType.add(nextSibling); - nextSibling = (Element) XOMTools.getNextSibling(nextSibling); - } - - return siblingElementsOfType; - } - - /** - * Returns an arrayList containing sibling elements of the given types after the given element. - * These elements need not be continuous and are returned in the order encountered - * @param currentElem: the element to look for following siblings of - * @param types: An array of the "localname"s of the element types desired - * @return - */ - public static List getNextSiblingsOfTypes(Element currentElem, String[] types){ - List laterSiblingElementsOfTypes= new ArrayList(); - currentElem =(Element) getNextSibling(currentElem); - while (currentElem !=null){ - String name =currentElem.getLocalName(); - for (String type : types) { - if (name.equals(type)){ - laterSiblingElementsOfTypes.add(currentElem); - break; - } - } - currentElem =(Element) getNextSibling(currentElem); - } - return laterSiblingElementsOfTypes; - } - - /** - * Returns an arrayList containing sibling elements of the given type before the given element. - * These elements need not be continuous - * @param currentElem: the element to look for previous siblings of - * @param type: the "localname" of the element type desired - * @return - */ - public static List getPreviousSiblingsOfType(Element currentElem, String type) { - List earlierSiblingElementsOfType= new ArrayList(); - Element parent =(Element) currentElem.getParent(); - if (parent==null){ - return earlierSiblingElementsOfType; - } - Elements potentialMatches =parent.getChildElements(type); - int indexOfCurrentElem =parent.indexOf(currentElem); - for (int i = 0; i < potentialMatches.size(); i++) { - if (parent.indexOf(potentialMatches.get(i)) < indexOfCurrentElem){ - earlierSiblingElementsOfType.add(potentialMatches.get(i)); - } - } - return earlierSiblingElementsOfType; - } - - /** - * Gets the next sibling element of the given element. If this element's name is within the elementsToIgnore array this is repeated - * If no appropriate element can be found null is returned - * @param startingEl - * @param elementsToIgnore - * @return - */ - public static Element getNextSiblingIgnoringCertainElements(Element startingEl, String[] elementsToIgnore){ - ParentNode parent = startingEl.getParent(); - if (parent==null){ - return null; - } - int i = parent.indexOf(startingEl); - if (i+1 >= parent.getChildCount()) return null; - Element next =(Element)parent.getChild(i+1); - String elName =next.getLocalName(); - for (String namesToIgnore : elementsToIgnore) { - if (elName.equals(namesToIgnore)){ - return getNextSiblingIgnoringCertainElements(next, elementsToIgnore); - } - } - return next; - } - - - /** - * Gets the previous sibling element of the given element. If this element's name is within the elementsToIgnore array this is repeated - * If no appropriate element can be found null is returned - * @param startingEl - * @param elementsToIgnore - * @return - */ - public static Element getPreviousSiblingIgnoringCertainElements(Element startingEl, String[] elementsToIgnore){ - ParentNode parent = startingEl.getParent(); - if (parent==null){ - return null; - } - int i = parent.indexOf(startingEl); - if (i==0) return null; - Element previous =(Element)parent.getChild(i-1); - String elName =previous.getLocalName(); - for (String namesToIgnore : elementsToIgnore) { - if (elName.equals(namesToIgnore)){ - return getPreviousSiblingIgnoringCertainElements(previous, elementsToIgnore); - } - } - return previous; - } - - /** - * Finds all descendant elements whose localname matches the given elementName - * Equivalent to an xpath of type .//*[local-name() = 'elementName'] from the startingElement - * @param startingElement - * @param elementName - * @return - */ - public static List getDescendantElementsWithTagName(Element startingElement, String elementName) { - List matchingElements = new ArrayList(); - LinkedList stack = new LinkedList(); - Elements children =startingElement.getChildElements(); - for (int i = children.size() -1; i >= 0; i--) { - stack.add(children.get(i)); - } - while (stack.size()>0){ - Element currentElement =stack.removeLast(); - if (currentElement.getLocalName().equals(elementName)){ - matchingElements.add(currentElement); - } - children =currentElement.getChildElements(); - for (int i = children.size() -1; i >= 0; i--) { - Element child =children.get(i); - stack.add(child); - } - } - return matchingElements; - } - - /** - * Finds all descendant elements whose localname matches one of the strings in elementNames - * Equivalent to an xpath of type .//*[local-name() = 'elementName1']|.//*[local-name() = 'elementName2']|.//*[local-name() = 'elementName3'] from the startingElement - * @param startingElement - * @param elementNames - * @return - */ - public static List getDescendantElementsWithTagNames(Element startingElement, String[] elementNames) { - List matchingElements = new ArrayList(); - LinkedList stack = new LinkedList(); - Elements children =startingElement.getChildElements(); - for (int i = children.size() -1; i >= 0; i--) { - stack.add(children.get(i)); - } - while (stack.size()>0){ - Element currentElement =stack.removeLast(); - String currentElName=currentElement.getLocalName(); - for (String targetTagName : elementNames) { - if (currentElName.equals(targetTagName)){ - matchingElements.add(currentElement); - break; - } - } - children =currentElement.getChildElements(); - for (int i = children.size() -1; i >= 0; i--) { - Element child =children.get(i); - stack.add(child); - } - } - return matchingElements; - } - - /** - * Finds all child elements whose localname matches one of the strings in elementNames - * Equivalent to an xpath of type ./*[local-name() = 'elementName1']|./*[local-name() = 'elementName2']|./*[local-name() = 'elementName3'] from the startingElement - * @param startingElement - * @param elementNames - * @return - */ - public static List getChildElementsWithTagNames(Element startingElement, String[] elementNames) { - List matchingElements = new ArrayList(); - Elements children =startingElement.getChildElements(); - int childCount = children.size(); - for (int i = 0; i < childCount; i++) { - Element child =children.get(i); - String currentElName=child.getLocalName(); - for (String targetTagName : elementNames) { - if (currentElName.equals(targetTagName)){ - matchingElements.add(child); - break; - } - } - } - return matchingElements; - } - - /** - * Finds all child elements whose localname matches one of the strings in elementNames - * Equivalent to an xpath of type ./*[local-name() = 'elementName'] from the startingElement - * This is equivalent to XOM's getChildElements(String) other than returning an arrayList - * @param startingElement - * @param elementName - * @return - */ - public static List getChildElementsWithTagName(Element startingElement, String elementName) { - List matchingElements = new ArrayList(); - Elements children =startingElement.getChildElements(); - int childCount = children.size(); - for (int i = 0; i < childCount; i++) { - Element child =children.get(i); - String currentElName=child.getLocalName(); - if (currentElName.equals(elementName)){ - matchingElements.add(child); - } - } - return matchingElements; - } - - /** - * Finds all descendant elements whose localname matches the given elementName - * Additionally the element must have the specified attribute and the value of the attribute must be as specified - * Equivalent to an xpath of type .//*[local-name() = 'elementName'][@attribute="attributevalue"] from the startingElement - * @param startingElement - * @param elementName - * @return - */ - public static List getDescendantElementsWithTagNameAndAttribute(Element startingElement, String elementName, String attributeName, String attributeValue) { - List matchingElements = new ArrayList(); - LinkedList stack = new LinkedList(); - Elements children =startingElement.getChildElements(); - for (int i = children.size() -1; i >= 0; i--) { - stack.add(children.get(i)); - } - while (stack.size()>0){ - Element currentElement =stack.removeLast(); - if (currentElement.getLocalName().equals(elementName)){ - if (attributeValue.equals(currentElement.getAttributeValue(attributeName))){ - matchingElements.add(currentElement); - } - } - children =currentElement.getChildElements(); - for (int i = children.size() -1; i >= 0; i--) { - Element child =children.get(i); - stack.add(child); - } - } - return matchingElements; - } - - /** - * Finds all child elements whose localname matches the given elementName - * Additionally the element must have the specified attribute and the value of the attribute must be as specified - * Equivalent to an xpath of type ./*[local-name() = 'elementName'][@attribute="attributevalue"] from the startingElement - * @param startingElement - * @param elementName - * @return - */ - public static List getChildElementsWithTagNameAndAttribute(Element startingElement, String elementName, String attributeName, String attributeValue) { - List matchingElements = new ArrayList(); - Elements children =startingElement.getChildElements(); - for (int i = 0; i < children.size(); i++) { - Element child =children.get(i); - if (child.getLocalName().equals(elementName)){ - if (attributeValue.equals(child.getAttributeValue(attributeName))){ - matchingElements.add(child); - } - } - } - return matchingElements; - } - - /** - * Finds and returns the number of elements and the number of elements with no children, that are descendants of the startingElement - * The 0th position of the returned array is the total number of elements - * The 1st position is the number of child less elements - * @param startingElement - * @return - */ - public static int[] countNumberOfElementsAndNumberOfChildLessElements(Element startingElement) { - int[] counts = new int[2]; - LinkedList stack = new LinkedList(); - stack.add(startingElement); - while (stack.size()>0){ - Element currentElement =stack.removeLast(); - Elements children =currentElement.getChildElements(); - int childCount = children.size(); - if (childCount==0){ - counts[1]++; - } - else{ - for (int i = 0; i < childCount; i++) { - counts[0]++; - stack.add(children.get(i)); - } - } - } - return counts; - } - - /** - * Find all the later siblings of startingElement with the search terminating at the element with string tagName - * or if there are not more siblings - * @param startingEl - * @param tagName - * @return - */ - public static List getSiblingsUpToElementWithTagName(Element startingEl, String tagName) { - List laterSiblings = new ArrayList(); - Element nextEl = (Element) XOMTools.getNextSibling(startingEl); - while (nextEl !=null && !nextEl.getLocalName().equals(tagName)){ - laterSiblings.add(nextEl); - nextEl = (Element) XOMTools.getNextSibling(nextEl); - } - return laterSiblings; - } -} diff -Nru opsin-1.5.0/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/opsinbuild.props opsin-2.3.1/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/opsinbuild.props --- opsin-1.5.0/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/opsinbuild.props 1970-01-01 00:00:00.000000000 +0000 +++ opsin-2.3.1/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/opsinbuild.props 2017-07-23 20:55:18.000000000 +0000 @@ -0,0 +1 @@ +version=${project.version} \ No newline at end of file diff -Nru opsin-1.5.0/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/alkanes.xml opsin-2.3.1/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/alkanes.xml --- opsin-1.5.0/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/alkanes.xml 2013-07-21 14:02:29.000000000 +0000 +++ opsin-2.3.1/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/alkanes.xml 2017-07-23 20:55:18.000000000 +0000 @@ -1,82 +1,68 @@ - - + - - meth - eth - prop - but - pent - hex - hept - oct - non - undec - - - hen - do - tri - tetr - pent - hex - hept - oct - non - - - dec - cos - icos - eicos - triacont - tricont - tetracont - pentacont - hexacont - heptacont - octacont - nonacont - decacont - - - hect - dict - trict - tetract - pentact - hexact - heptact - octact - nonact - - - kili - dili - trili - tetrali - pentali - hexali - heptali - octali - nonali - - - - normal - normal- - tertiary- - tertiary - tert - tert. - tert- - tert.- - t- - iso - iso- - sec- - sec - neo - neo- - - \ No newline at end of file + + meth + eth + prop + but + pent + hex + hept + oct + non + undec + + + hen + do + tri + tetr + pent + hex + hept + oct + non + + + dec + cos|icos|eicos + triacont|tricont + tetracont + pentacont + hexacont + heptacont + octacont + nonacont + decacont + + + hect + dict + trict + tetract + pentact + hexact + heptact + octact + nonact + + + kili + dili + trili + tetrali + pentali + hexali + heptali + octali + nonali + + + + normal|normal |normal- + tertiary|tertiary |tertiary-|tert|tert.|tert-|tert.-|t- + iso|iso- + sec|sec.|sec-|sec.-|secondary|secondary |secondary- + neo|neo- + + diff -Nru opsin-1.5.0/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/aminoAcids.xml opsin-2.3.1/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/aminoAcids.xml --- opsin-1.5.0/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/aminoAcids.xml 2013-07-21 14:02:29.000000000 +0000 +++ opsin-2.3.1/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/aminoAcids.xml 2017-07-23 20:55:18.000000000 +0000 @@ -6,263 +6,257 @@ - + + - - alan - dehydroalan - alpha,beta-dehydroalan - argin - asparag - isoasparag - alpha-asparag - cystathion - isoglutam - iso-glutam - glyc - histid - isoleuc - iso-leuc - alloisoleuc - allo-isoleuc - leuc - lys - methion - selenomethion - phenylalan - dehydrophenylalan - alpha,beta-dehydrophenylalan - prol - ser - threon - allothreon - allo-threon - tyros - val - norval - norleuc - allys - citrull - ornith - sarcos - thyron - thyrox - pyrrolys - lanthion - hydroxyprol - hydroxylys - hadacid - isoser - iso-ser - abr - agarit - azaser - alanos - albizz - albizzi - alli - ethion - selenoethion - canal - canavan - cycloleuc - isoval - iso-val - penicillam - thean - felin - azidonorleuc - azidohomoalan - tertleuc - tert-leuc - t-leuc - panton - lamin - saccharop - tertbutylalan - tert-butylalan - t-butylalan - tertbutylglyc - tert-butylglyc - t-butylglyc + + alan + dehydroalan|alpha,beta-dehydroalan + argin + asparag + isoasparag|alpha-asparag + cystathion + isoglutam|iso-glutam + glyc + histid + isoleuc|iso-leuc + alloisoleuc|allo-isoleuc + leuc + lys + methion + selenomethion|seleno-methion + telluromethion|telluro-methion + phenylalan + dehydrophenylalan|alpha,beta-dehydrophenylalan + prol + ser + threon + allothreon|allo-threon + tyros + val + norval + norleuc + allys + citrull + ornith + sarcos + thyron + thyrox + pyrrolys + lanthion + hydroxyprol + hydroxylys + hadacid + isoser|iso-ser + agarit + azaser + alanos + albizz|albizzi + alli + ethion + selenoethion|seleno-ethion + telluroethion|telluro-ethion + canal + canavan + cycloleuc + isoval|iso-val + penicillam + phenylglyc + thean + felin + azidolys + azido-lys|azidonorleuc|azido-norleuc + azidohomoalan|azido-homoalan + azidophenylalan|azido-phenylalan + monoiodotyros|mono-iodotyros + diiodotyros|di-iodotyros + triiodothyron|tri-iodothyron + tertiaryleuc|tertiary-leuc|tertleuc|tert-leuc|tert.leuc|tert.-leuc|t-leuc + thiaprol|thioprol + panton + lamin + saccharop + tertiarybutylalan|tertiary-butylalan|tertbutylalan|tert-butylalan|tert.butylalan|tert.-butylalan|t-butylalan + tertiarybutylglyc|tertiary-butylglyc|tertbutylglyc|tert-butylglyc|tert.butylglyc|tert.-butylglyc|t-butylglyc + propargylglyc + allylglyc + diphenylalan - homoalan - homoargin - homocitrull - homomethion - homolanthion - homoleuc - homophenylalan - homopropargylglyc - homoser - homotyros + homoalan + homoargin + homocitrull + homomethion + homolanthion + homoleuc + homophenylalan + homoprol + homopropargylglyc + homoser + homotyros - beta-alan - beta-homoalan - beta-homoargin - beta-homoasparag - beta-homohydroxyprol - beta-homoisoleuc - beta-homoleuc - beta-homolys - beta-homomethion - beta-homophenylalan - beta-homoprol - beta-homopropargylglyc - beta-homoser - beta-homothreon - beta-homotyros - beta-leuc - beta-phenylalan - + beta-alan + beta-homoalan + beta-homoargin + beta-homoasparag + beta-homohydroxyprol + beta-homoisoleuc + beta-homoleuc + beta-homolys + beta-homomethion + beta-homophenylalan + beta-homoprol + beta-homopropargylglyc + beta-homoser + beta-homothreon + beta-homotyros + beta-leuc + beta-phenylalan + - - alpha-glutam - glutam - beta-glutam - homoglutam - beta-homoglutam - cyste - homocyste - selenocyste - selenohomocyste - - - - tryptoph - homotryptoph - beta-homotryptoph - - - - aspart - homoaspart - glutam - homoglutam - beta-homoglutam - beta-glutam - beta-homoaspart - cyste - homocyste - selenocyste - pantothen - pyroglutam - - - - aspart-1-yl - l-aspart-1-yl - d-aspart-1-yl - alpha-aspartyl - l-alpha-aspartyl - alpha-l-aspartyl - d-alpha-aspartyl - alpha-d-aspartyl - aspart-4-yl - l-aspart-4-yl - d-aspart-4-yl - beta-aspartyl - l-beta-aspartyl - beta-l-aspartyl - d-beta-aspartyl - beta-d-aspartyl - glutam-1-yl - l-glutam-1-yl - d-glutam-1-yl - alpha-glutamyl - l-alpha-glutamyl - alpha-l-glutamyl - d-alpha-glutamyl - alpha-d-glutamyl - glutam-5-yl - l-glutam-5-yl - d-glutam-5-yl - gamma-glutamyl - l-gamma-glutamyl - gamma-l-glutamyl - d-gamma-glutamyl - gamma-d-glutamyl - - cystyl - l-cystyl - d-cystyl - half-cystyl - l-half-cystyl - d-half-cystyl - - - - aspartate(2-) - aspartate(1-) - glutamate(2-) - glutamate(1-) - lysinium(1+) - lysinium(2+) - - aspart-1-al - aspart-4-al - aspart-1-ol - aspart-4-ol - - glutam-1-al - glutam-4-al - glutam-1-ol - glutam-4-ol - - arginamide - arginate - homoarginamide - homoarginate - methionine sulfoxide - pantothenol - panthenol - pidolic acid - pidolicacid - pidolate - pyroglutamide - - cystine - dopa - homocystine - selenocystine - - - - alanopine - beta-alanopine - butyrine - ciliatine - cystamine - cysteamine - glutathione disulfide - glutathionedisulfide - methioninamine - octopinic acid - octopinicacid - octopine - selenocystamine - selenocysteamine - hypotaurine - nopaline - selenohypotaurine - strombine - taurine - s-adenosylmethionine - s-adenosyl-l-methionine - s-adenosyl-d-methionine - - selenotaurine - tauropine - tricine - isolysine - beta-lysine - lysopine - d-lysopine - pantetheine - statine - + + alpha-glutam + glutam + beta-glutam + homoglutam + beta-homoglutam + cyste + homocyste + selenocyste|seleno-cyste + selenohomocyste|seleno-homocyste + tellurocyste|telluro-cyste + tellurohomocyste|telluro-homocyste + + + + tryptoph + homotryptoph + beta-homotryptoph + + + + aspart + homoaspart|glutam + homoglutam + beta-homoglutam + beta-glutam|beta-homoaspart + carboxyglutam + cyste + homocyste + selenocyste|seleno-cyste + tellurocyste|telluro-cyste + pantothen + pyroglutam + + + + aspart-1-yl|l-aspart-1-yl|alpha-aspartyl|l-alpha-aspartyl|alpha-l-aspartyl + d-aspart-1-yl|d-alpha-aspartyl|alpha-d-aspartyl + aspart-4-yl|l-aspart-4-yl|beta-aspartyl|l-beta-aspartyl|beta-l-aspartyl + d-aspart-4-yl|d-beta-aspartyl|beta-d-aspartyl + glutam-1-yl|l-glutam-1-yl|alpha-glutamyl|l-alpha-glutamyl|alpha-l-glutamyl + d-glutam-1-yl|d-alpha-glutamyl|alpha-d-glutamyl + glutam-5-yl|l-glutam-5-yl|gamma-glutamyl|l-gamma-glutamyl|gamma-l-glutamyl + d-glutam-5-yl|d-gamma-glutamyl|gamma-d-glutamyl + + cystyl|l-cystyl + d-cystyl + half-cystyl|l-half-cystyl + d-half-cystyl + tryptyl|l-tryptyl + d-tryptyl + + + + aspartate(2-) + aspartate(1-) + glutamate(2-) + glutamate(1-) + lysinium(1+) + lysinium(2+) + + aspart-1-al + aspart-4-al + aspart-1-ol + aspart-4-ol + + glutam-1-al + glutam-4-al + glutam-1-ol + glutam-4-ol + + arginamide|arginamid + arginate|arginat + homoarginamide|homoarginamid + homoarginate|homoarginat + methionine sulfoxide|methioninesulfoxide|methionin sulfoxid|methionin-sulfoxid|methioninsulfoxid|methionine oxide|methionineoxide|methionin oxid|methionin-oxid|methioninoxid + pantothenol|panthenol + pidolic acid|pidolicacid + pidolate|pidolat + pyroglutamal + pyroglutamol + pyroglutamide|pyroglutamid + selenomethionine selenoxide|selenomethionineselenoxide|selenomethionin selenoxid|selenomethionin-selenoxid|selenomethioninselenoxid|selenomethionine oxide|selenomethionineoxide|selenomethionin oxid|selenomethionin-oxid|selenomethioninoxid + telluromethionine telluroxide|telluromethioninetelluroxide|telluromethionin telluroxid|telluromethionin-telluroxid|telluromethionintelluroxid|telluromethionine oxide|telluromethionineoxide|telluromethionin oxid|telluromethionin-oxid|telluromethioninoxid + + abrine + cystine|cystin + cystinate|cystinat + dopa + homocystine|homocystin + homocystinate|homocystinat + selenocystine|seleno-cystine|selenocystin|seleno-cystin + selenocystinate|seleno-cystinate|selenocystinat|seleno-cystinat + tellurocystine|telluro-cystine|tellurocystin|telluro-cystin + tellurocystinate|telluro-cystinate|tellurocystinat|telluro-cystinat + + + + alanopine|alanopin + beta-alanopine|beta-alanopin + butyrine + carnitine|carnitin + l-carnitine|l-carnitin + d-carnitine|d-carnitin + ciliatine + creatine|creatin + cystamine|cystamin + cysteamine|cysteamin + diaminopimelic acid|d,l-diaminopimelic acid|meso-diaminopimelic acid + l,l-diaminopimelic acid|ll-diaminopimelic acid|l-diaminopimelic acid + d,d-diaminopimelic acid|dd-diaminopimelic acid|d-diaminopimelic acid + dibromotyrosine|dibromotyrosin + dihydroxyphenylglycine|dihydroxyphenylglycin + glutathione disulfide|glutathionedisulfide|glutathion disulfid|glutathion-disulfid|glutathiondisulfid + guvacine|guvacin + hypotaurocyamine|hypotaurocyamin + methioninamine|methioninamin + methylselenocysteine|methylselenocystein + octopinic acid|octopinicacid + octopine|octopin + selenocystamine|seleno-cystamine|selenocystamin|seleno-cystamin + selenocysteamine|seleno-cysteamine|selenocysteamin|seleno-cysteamin + hypotaurine|hypotaurin + nopaline + selenohypotaurine|seleno-hypotaurine|selenohypotaurin|seleno-hypotaurin + strombine + taurine|taurin + selenotaurine|seleno-taurine|selenotaurin|seleno-taurin + taurocyamine|taurocyamin + tauropine|tauropin + tetrazolylglycine|tetrazolylglycin + tricine + isolysine|beta-lysine|isolysin|beta-lysin + lysopine|d-lysopine|lysopin|d-lysopin + d-methionine (s)-sulfoxide|d-methionine-(s)-sulfoxide|d-methionine-s-sulfoxide|d-methionin-(s)-sulfoxid|d-methionin-s-sulfoxid|d-methionine (s)-s-oxide|d-methionine-(s)-s-oxide|d-methionin-(s)-s-oxid + d-methionine (r)-sulfoxide|d-methionine-(r)-sulfoxide|d-methionine-r-sulfoxide|d-methionin-(r)-sulfoxid|d-methionin-r-sulfoxid|d-methionine (r)-s-oxide|d-methionine-(r)-s-oxide|d-methionin-(r)-s-oxid + methionine (s)-sulfoxide|methionine-(s)-sulfoxide|methionine-s-sulfoxide|methionin-(s)-sulfoxid|methionin-s-sulfoxid|methionine (s)-s-oxide|methionine-(s)-s-oxide|methionin-(s)-s-oxid|l-methionine (s)-sulfoxide|l-methionine-(s)-sulfoxide|l-methionine-s-sulfoxide|l-methionin-(s)-sulfoxid|l-methionin-s-sulfoxid|l-methionine (s)-s-oxide|l-methionine-(s)-s-oxide|l-methionin-(s)-s-oxid + methionine (r)-sulfoxide|methionine-(r)-sulfoxide|methionine-r-sulfoxide|methionin-(r)-sulfoxid|methionin-r-sulfoxid|methionine (r)-s-oxide|methionine-(r)-s-oxide|methionin-(r)-s-oxid|l-methionine (r)-sulfoxide|l-methionine-(r)-sulfoxide|l-methionine-r-sulfoxide|l-methionin-(r)-sulfoxid|l-methionin-r-sulfoxid|l-methionine (r)-s-oxide|l-methionine-(r)-s-oxide|l-methionin-(r)-s-oxid + pantetheine|pantethein + statine + trimethylglycine|trimethylglycin + diff -Nru opsin-1.5.0/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/arylGroups.xml opsin-2.3.1/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/arylGroups.xml --- opsin-1.5.0/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/arylGroups.xml 2013-07-21 14:02:29.000000000 +0000 +++ opsin-2.3.1/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/arylGroups.xml 2017-07-23 20:55:18.000000000 +0000 @@ -1,367 +1,401 @@ - - - - - - 1-pyrindan - 1-pyrinden - 1-pyrindin - 2-pyrindan - 2-pyrinden - 2-pyrindin - aceanthren - aceanthrylen - acenaphthylen - acenaphthen - acenaphthoquinon - acephenanthren - acephenanthrylen - acridan - acridarsin - acridin - acridophosphin - acrindolin - adrenalin - alloxan - amphetamin - anethol - anilin - anisidin - anisol - anthracen - anthraquinon - anthradiquinon - anthrazin - anthyridin - arsanthren - arsanthridin - arsindol - arsindolin - arsindolizin - arsinolin - arsinolizin - as-indacen - azulen - benzanthron - benzidin - 4,4'-benzidin - benzen - benzoin - benzoquinon - benzyn - betacarbolin - beta-carbolin - b-carbolin - bibenzyl - biphenylen - boranthren - borneol - caffein - carbazol - camphen - camphorquinon - catechol - chalcon - cholanthren - chroman - thiochroman - selenochroman - tellurochroman - chromen - thiochromen - selenochromen - tellurochromen - chromenylium - thiochromenylium - selenochromenylium - tellurochromenylium - chromon - chrysen - cinnolin - collidin - coumarin - coronen - cresol - cumen - cymen - cyclopenta[a]phenanthren - decalin - dopamin - duren - ethylenimin - eugenol - flavylium - fluoranthen - fluoren - fulven - furan - thiofuran - selenofuran - tellurofuran - furazan - guaiacol - harmalin - harmalol - harman - harmanamid - harmin - hemimelliten - histamin - homopiperazin - homopiperidin - hydantoin - hydrobenzoin - hydroquinon - imidazol - imidazolidin - imidazolin - indan - indazol - inden - indol - indolin - indolizin - isatin - isoarsindol - isoarsinolin - isobenzofuran - isobenzothiofuran - isobenzothiophen - isochroman - isothiochroman - isoselenochroman - isotellurochroman - isochromen - isothiochromen - isoselenochromen - isotellurochromen - isochromenylium - isothiochromenylium - isoselenochromenylium - isotellurochromenylium - isocoumarin - isoduren - isoguanin - isoindol - isoindolin - isoquinolin - isoquinolon - isophosphindol - isophosphinolin - isosafrol - isoselenazol - isoselenazolidin - isoselenazolin - isotellurazol - isotellurazolidin - isotellurazolin - isothiazol - isothiazolidin - isothiazolin - isoxazol - isooxazol - isoxazolidin - isooxazolidin - isoxazolin - isooxazolin - isoviolanthren - lepidin - lupetidin - lutidin - menthol - mercuranthren - mesitylen - melliten - morpholin - thiomorpholin - selenomorpholin - telluromorpholin - naphthacen - naphthalen - naphthoquinon - naphthodiquinon - naphthyridin - norbornen - norcamphen - norbornylen - norharmin - ovalen - oxanthren - oxindol - paracetamol - paraxanthin - perimidin - perylen - phenalen - phenanthrazin - phenanthren - phenanthridin - phenanthrolin - phenarsazin - phenazin - phenetidin - phenetol - phenoxid - phenoxylium - phenomercurin - phenoxazin - phenothiazin - phenoselenazin - phenotellurazin - phenophosphazinin - phenophosphazin - phenarsazinin - phenazasilin - phenoarsazin - phenomercurazin - phenomercazin - phenoxathiin - phenoxaselenin - phenoxasilin - phenoxatellurin - phenoxaphosphinin - phenoxaphosphin - phenoxarsinin - phenoxarsin - phenoxastibinin - phenoxantimonin - phenothiarsinin - phenothiarsin - phloroglucinol - phosphanthren - phosphanthridin - phosphindol - phosphindolizin - phosphinolin - phosphinolizin - phthalazin - phthalid - phthaloperin - pinolin - picolin - piperazin - piperidin - picen - pleiaden - prehniten - pseudocumen - pteridin - purin - pyran - thiopyran - selenopyran - telluropyran - pyranthren - pyrazin - pyrazol - pyrazolidin - pyrazolin - pyren - pyridazin - pyridin - pyrimidin - 1-pyrinden - 2-pyrinden - pyrocatechol - pyrogallol - pyrrol - pyrrolizin - pyrrolidin - pyrrolidon - pyrrolin - pyrylium - quinaldin - quinazolin - quindolin - quinindolin - quinolin - quinolizin - quinolon - quinoxalin - quinuclidin - resorcinol - rhodanin - rubicen - safrol - s-indacen - s-triazin - sym-triazin - s-triazol - sym-triazol - silanthren - skatol - stilben - sulfolan - styren - selenanthren - selenophen - telluranthren - tellurophen - tetralin - tetralon - thebenidin - theobromin - theophyllin - thianthren - thiophen - tolan - toluen - toluidin - tosylat - trinden - trinaphthylen - triphenodioxazin - triphenodithiazin - triphenylen - tritan - tyramin - tryptamin - tryptolin - urazol - veratrol - xanthen - thioxanthen - selenoxanthen - telluroxanthen - xanthylium - violanthren - xylen - xylidin - - - - - adenin - cytosin - guanin - hypoxanthin - thymin - uracil - xanthin - - adenosin - cytidin - guanosin - inosine - thymidin - uridin - xanthosin - - nucleocidin - idoxuridin - ribosylthymin - orotidin - pseudouridin - - - bilin - corrin - porphyrin - porphin - flavan - isoflavan - neoflavan - flavon - flavanon - - \ No newline at end of file + + + + + aceanthren + aceanthrylen + acenaphthylen + acenaphthen + acenaphthoquinon + acephenanthren + acephenanthrylen + acridan + acridarsin + acridin + acridophosphin + acrindolin + adrenalin + alloxan + amphetamin + anethol + anilin + anisidin + anisol + anthracen + anthraquinon + anthradiquinon + anthrazin + anthyridin + arsanthren + arsanthridin + arsindol + arsindolin + arsindolizidin + arsindolizin + arsinolin + arsinolizin + as-indacen + as-triazin|asym-triazin + azulen + benzanthron + benzidin|4,4'-benzidin + benzen + benzoin + benzoquinon + benzotribromid + benzotrichlorid + benzotrifluorid + benzotriiodid + benzyn + betacarbolin|beta-carbolin|b-carbolin + bibenzyl + biphenylen + boranthren + borneol + caffein + camphen + camphorquinon + carbazol + carbostyril + catechol + chalcon + chinolin + cholanthren + chroman + thiochroman + selenochroman + tellurochroman + chromen + chrom-2-en + chrom-3-en + thiochromen + selenochromen + tellurochromen + chromenylium + thiochromenylium + selenochromenylium + tellurochromenylium + chromocen + chromon + chrysen + cinnolin + cobaltocen + collidin + coumaran + coumarin|cumarin + coumaron + coronen + cresol + cumen + cymen + cyclopenta[a]phenanthren + decalin + dopamin + duren + ethylenimin + eugenol + ferrocen + flavylium + fluoranthen + fluoren + spiro-9,9'-bifluoren + fulven + furan + thiofuran + selenofuran + tellurofuran + furazan + furoxan + guaiacol + harmalin + harmalol + harman + harmanamid + harmin + hemimelliten + histamin + homomorpholin + thiahomomorpholin|thiohomomorpholin + selenohomomorpholin + tellurohomomorpholin + homopiperazin + homopiperidin + hydantoin + hydrobenzoin + hydroquinon + imidazol + imidazolidin + imidazolin + indan + ind-1-en + ind-2-en + indazol + inden + indol + indolin + indolizidin + indolizin + indoxazen + isatin + isoarsindol + isoarsindolin + isoarsinolin + isobenzofuran + isobenzothiofuran|isobenzothiophen + isocarbostyril + isochinolin + isochroman + isothiochroman + isoselenochroman + isotellurochroman + isochromen + isothiochromen + isoselenochromen + isotellurochromen + isochromenylium + isothiochromenylium + isoselenochromenylium + isotellurochromenylium + isocoumarin|isocumarin + isoduren + isoguanin + isoindol + isoindolin + isoquinolin + isoquinolon + isophosphindol + isophosphindolin + isophosphinolin + isosafrol + isoselenazol + isoselenazolidin + isoselenazolin + isotellurazol + isotellurazolidin + isotellurazolin + isothiazol + isothiazolidin + isothiazolin + isoxazol|isooxazol + isoxazolidin|isooxazolidin + isoxazolin|isooxazolin + isoviolanthren + lepidin + lupetidin + menthol + mercuranthren + mesitylen + melliten + molybdocen + morpholin + thiamorpholin|thiomorpholin + selenomorpholin + telluromorpholin + naphthacen + naphthalen|naphthalin + naphthoquinon + naphthodiquinon + naphthyridin + nickelocen + niobocen + norbornen|norcamphen|norbornylen + norharmin + osmocen + ovalen + oxanthren + oxindol + paracetamol + paraxanthin + perimidin + perylen + phenalen + phenanthrazin + phenanthren + phenanthridin + phenanthrolin + phenarsazin + phenazin + phenetidin + phenetol + phenoxid + phenoxylium + phenomercurin + phenoxazin + phenothiazin + phenoselenazin + phenotellurazin + phenophosphazinin|phenophosphazin + phenarsazinin + phenazasilin + phenoarsazin + phenomercurazin|phenomercazin + phenoxathiin + phenoxaselenin + phenoxasilin + phenoxatellurin + phenoxaphosphinin|phenoxaphosphin + phenoxarsinin|phenoxarsin + phenoxastibinin|phenoxantimonin + phenothiarsinin|phenothiarsin + phloroglucinol + phosphanthren + phosphanthridin + phosphindol + phosphindolin + phosphindolizidin + phosphindolizin + phosphinolin + phosphinolizin + phthalazin + phthalid + phthaloperin + piaselenol|piazselenol + piazthiol + pinolin + picolin + piperazin + piperidin + picen + pleiaden + plumbocen + prehniten + pseudocumen + pteridin + purin + pyran + thiopyran + selenopyran + telluropyran + pyranthren + pyrazin + pyrazol + pyrazolidin + pyrazolin + pyren + pyridazin + pyridin + pyrimidin + pyrindan + pyrinden|pyrindin + pyrocatechol + pyrogallol + pyrrol + pyrrolizidin + pyrrolizin + pyrrolidin + pyrrolidon + pyrrolin + pyrylium + quinaldin + quinazolin + quindolin + quinindolin + quinolin + quinolizidin + quinolizin + quinolon + quinoxalin + quinuclidin + resorcinol + rhodanin + rhodocen + rubicen + ruthenocen + safrol + s-indacen + s-triazin|sym-triazin + s-triazol|sym-triazol + silanthren + skatol + stilben + sulfolan + sulfol-2-en + sulfol-3-en + sulfolen + styren + selenanthren + selenophen + telluranthren + tellurophen + tetralin + tetralon + thebenidin + theobromin + theophyllin + thianthren + thiophen + titanocen + tolan + toluen + toluidin + trinden + trinaphthylen + triphenodioxazin + triphenodithiazin + triphenylen + tritan + tyramin + tryptamin + tryptolin + uranocen + urazol + v-triazin + vanadocen + veratrol + xanthen + thioxanthen + selenoxanthen + telluroxanthen + xanthylium + violanthren + xylen + xylidin + zirconocen + + + + + adenin + cytosin + guanin + hypoxanthin + thymin + uracil + xanthin + + adenosin + cytidin + guanosin + 7-methylguanosin|n7-methylguanosin|7-methyl-guanosin|n7-methylguanosin + inosin + thioinosin + thymidin|deoxythymidin + uridin + xanthosin + + nucleocidin + idoxuridin + ribosylthymin + orotidin + pseudouridin + wybutosin + + + bilin + corrin + dihydrophloroglucinol + porphyrin|porphin + flavan + flavan-2-en|flav-2-en + flavan-3-en|flav-3-en + isoflavan|iso-flavan + isoflavan-2-en|isoflav-2-en|iso-flavan-2-en|iso-flav-2-en + isoflavan-3-en|isoflav-3-en|iso-flavan-3-en|iso-flav-3-en + isoflavon|iso-flavon + neoflavan|neo-flavan + neoflavan-2-en|neoflav-2-en|neo-flavan-2-en|neo-flav-2-en + neoflavan-3-en|neoflav-3-en|neo-flavan-3-en|neo-flav-3-en + flavon + flavanon + + diff -Nru opsin-1.5.0/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/arylSubstituents.xml opsin-2.3.1/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/arylSubstituents.xml --- opsin-1.5.0/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/arylSubstituents.xml 2013-07-21 14:02:29.000000000 +0000 +++ opsin-2.3.1/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/arylSubstituents.xml 2017-07-23 20:55:18.000000000 +0000 @@ -1,132 +1,133 @@ - - - - - - - imidaz - indaz - isobenzothioph - isothiaz - isoxaz - isooxaz - pheneth - pyrr - styr - tol - - - - phenyl - - - - - acenaphth - acrid - acridars - anthrac - anthr - arsanthr - arsanthrid - as-indac - azul - benzhydr - boranthr - chrys - cinnol - coron - cyclopenta[a]phenanthr - cym - fluoranth - fulv - fur - thiofur - selenofur - tellurofur - furfur - imidazolid - indoliz - isobenzofur - isobenzothiofur - isoquinol - isoselenaz - isotelluraz - isoxazolid - mesit - morphol - thiomorphol - selenomorphol - telluromorphol - naphthac - naphthal - naphth - naphthyrid - oval - oxanthr - perimid - phenal - phenanthr - phenanthrid - phenanthrol - phenarsaz - phenaz - phenoxaz - phenothiaz - phenoselenaz - phenotelluraz - phenophosphaz - phenoarsaz - phenomercuraz - phenomercaz - phenoxaselen - phenoxatellur - phenoxaphosph - phenoxars - phenoxastibin - phenoxantimon - phenothiars - phosphanthr - piperaz - piperid - pleiad - pterid - pur - pyr - thiopyr - selenopyr - telluropyr - pyranthr - pyrazolid - pyridaz - pyrid - pyrimid - pyrroliz - pyrrolid - quinazol - quinol - quinoliz - quinoxal - quinuclid - rubic - s-indac - selenanthr - selenoph - silanthr - stilb - telluranthr - telluroph - thianthr - thioph - thien - thiene - tolu - xanth - thioxanth - selenoxanth - telluroxanth - - \ No newline at end of file + + + + + + imidaz + indaz + isobenzothioph + isothiaz + isoxaz|isooxaz + phenmeth + pheneth + phenprop + phenisoprop + phenbut + phenpent + phenhex + pyrr + styr + tol + + + + phenyl + + + + + acenaphth + acrid + acridars + anthrac + anthr + arsanthr + arsanthrid + as-indac + azul + benzhydr + boranthr + chrys + cinnol + coron + cyclopenta[a]phenanthr + cym + fluoranth + fulv + fur + thiofur + selenofur + tellurofur + furfur + imidazolid + indoliz + isobenzofur + isobenzothiofur + isoquinol + isoselenaz + isotelluraz + isoxazolid + mesit + morphol + thiamorphol|thiomorphol + selenomorphol + telluromorphol + naphthac + naphthal + naphth + naphthyrid + oval + oxanthr + perimid + phenal + phenanthr + phenanthrid + phenanthrol + phenarsaz + phenaz + phenoxaz + phenothiaz + phenoselenaz + phenotelluraz + phenophosphaz + phenoarsaz + phenomercuraz|phenomercaz + phenoxaselen + phenoxatellur + phenoxaphosph + phenoxars + phenoxastibin|phenoxantimon + phenothiars + phosphanthr + piperaz + piperid + pleiad + pterid + pur + pyr + thiopyr + selenopyr + telluropyr + pyranthr + pyrazolid + pyridaz + pyrid + pyrimid + pyrroliz + pyrrolid + quinazol + quinol + quinoliz + quinoxal + quinuclid + rubic + s-indac + selenanthr + selenoph + silanthr + stilb + telluranthr + telluroph + thianthr + thioph + thien|thiene + tolu + xanth + thioxanth + selenoxanth + telluroxanth + + diff -Nru opsin-1.5.0/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/atomHydrides.xml opsin-2.3.1/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/atomHydrides.xml --- opsin-1.5.0/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/atomHydrides.xml 2013-07-21 14:02:29.000000000 +0000 +++ opsin-2.3.1/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/atomHydrides.xml 2017-07-23 20:55:18.000000000 +0000 @@ -1,42 +1,41 @@ - - - - - - bor - alum - indig - gall - thall - - carb - sil - germ - stann - plumb - - az - phosph - phosphor - ars - arsor - stib - stibor - bismuth - - oxid - sulf - sel - tell - pol - - fluor - chlor - brom - iod - astat - - - az - - \ No newline at end of file + + + + + bor + alum + indig + gall + thall + + carb + sil + germ + stann + plumb + + az + phosph + phosphor + ars + arsor + stib + stibor + bismuth + + oxid + sulf + sel + tell + pol + + fluor + chlor + brom + iod + astat + + + az + + diff -Nru opsin-1.5.0/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/carbohydrateSuffixes.xml opsin-2.3.1/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/carbohydrateSuffixes.xml --- opsin-1.5.0/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/carbohydrateSuffixes.xml 2013-07-21 14:02:29.000000000 +0000 +++ opsin-2.3.1/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/carbohydrateSuffixes.xml 2017-07-23 20:55:18.000000000 +0000 @@ -1,88 +1,90 @@ - - - - - - ide - id - - - - - uronamide - uronate - uronic acid - uronicacid - urononitrile - onamide - onate - onic acid - onicacid - ononitrile - - - - - ose - os - - - - - arate - arate(2-) - aric acid - aricacid - onamide - onate - onic acid - onicacid - ononitrile - osonamide - osonate - osonic acid - osonicacid - osononitrile - uronamide - uronate - uronic acid - uronicacid - urononitrile - - - - - uron - - - - - odiald - - - - - itol - - - - - ul - - - - - ul - - - - - yl - - - - - onoyl - uronoyl - - \ No newline at end of file + + + + + ide|id + + + + + uronamide|uronamid + uronate|uronat + uronic acid + uronicacid + urononitrile|urononitril + onamide|onamid + onate|onat + onic acid|onicacid + ononitrile|ononitril + + + + + ose|os + + + + + arate|arat|arate(2-)|arat(2-) + aric acid|aricacid + onamide|onamid + onate|onat + onic acid|onicacid + ononitrile|ononitril + osonamide|osonamid + osonate|osonat + osonic acid|osonicacid + osononitrile|osononitril + uronamide|uronamid + uronate|uronat + uronic acid + uronicacid + urononitrile|urononitril + + + + + + on + uron + + + + + ar + on + oson + uron + + + + + odiald + + + + + itol + + + + + ul + + + + + ul + + + + + yl + + + + + onoyl + uronoyl + + diff -Nru opsin-1.5.0/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/carbohydrates.xml opsin-2.3.1/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/carbohydrates.xml --- opsin-1.5.0/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/carbohydrates.xml 2013-07-21 14:02:29.000000000 +0000 +++ opsin-2.3.1/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/carbohydrates.xml 2017-07-23 20:55:18.000000000 +0000 @@ -1,66 +1,65 @@ - - glycer + glycer - erythr - thre + erythr + thre - rib - arabin - xyl - lyx - - all - altr - gluc - mann - gul - id - galact - tal + rib + arabin + xyl + lyx + + all + altr + gluc + mann + gul + id + galact + tal - abequ - amicet - ascaryl - boivin - colit - digitox - fuc - parat - quinov - rhamn - rhodin - tyvel + abequ + amicet + ascaryl + boivin + colit + digitox + fuc + parat + quinov + rhamn + rhodin + tyvel - hamamel - cladin - strept - eval - evernitr - api + hamamel + cladin + strept + eval + evernitr + api - erythrul + erythrul - ribul - xylul + ribul + xylul - psic - fruct - sorb - tagat + psic + fruct + sorb + tagat - sedoheptul + sedoheptul @@ -96,91 +95,106 @@ - tri - tetr - pent - hex - hept - oct - non - dec - undec - dodec - tridec - tetradec - pentadec - hexadec + tri + tetr + pent + hex + hept + oct + non + dec + undec + dodec + tridec + tetradec + pentadec + hexadec - - dithioerythritol - dithiothreitol - galactal - glucal - sorbitol - rhamnulose - fuculose - - glucamine - saccharic acid - saccharate - - galactosamine - glucosamine - mannosamine - fucosamine - quinovosamine - bacillosamine - garosamine - - neuraminic acid - alpha-neuraminic acid - beta-neuraminic acid - neuraminate - alpha-neuraminate - beta-neuraminate - neuraminamide - alpha-neuraminamide - beta-neuraminamide - neuraminol - alpha-neuraminol - beta-neuraminol - - muramic acid - alpha-muramic acid - beta-muramic acid - isomuramic acid - alpha-isomuramic acid - beta-isomuramic acid + + dithioerythritol + dithiothreitol + sorbitol + rhamnulose + fuculose + glucamine|glucamin + meglumine|meglumin + saccharic acid + saccharate|saccharat + + + + + galactal + glucal + + ascorbic acid + dehydroascorbic acid + ascorbate|ascorbat + + galactosamine|galactosamin + glucosamine|glucosamin + mannosamine|mannosamin + fucosamine|fucosamin + quinovosamine|quinovosamin + bacillosamine|bacillosamin + garosamine|garosamin + + neuraminic acid + neuraminate|neuraminat + neuraminamide|neuraminamid + neuraminol + + muramic acid + isomuramic acid + + + lactose + lactosamine|lactosamin + lactosediamine|lactosediamin - glyceraldehyde - d-glyceraldehyde - (d)-glyceraldehyde - l-glyceraldehyde - (l)-glyceraldehyde + glyceraldehyde|glyceraldehyd + d-glyceraldehyde|(d)-glyceraldehyde|d-glyceraldehyd|(d)-glyceraldehyd + l-glyceraldehyde|(l)-glyceraldehyde|l-glyceraldehyd|(l)-glyceraldehyd + dextrose + + - apio-alpha-d-furanose - d-apio-alpha-d-furanose - apio-beta-d-furanose - d-apio-beta-d-furanose - apio-alpha-l-furanose - d-apio-alpha-l-furanose - apio-beta-l-furanose - d-apio-beta-l-furanose - l-apio-alpha-d-furanose - l-apio-beta-d-furanose - l-apio-alpha-l-furanose - l-apio-beta-l-furanose - - dextrose + apio-alpha-d-furanose|d-apio-alpha-d-furanose + apio-beta-d-furanose|d-apio-beta-d-furanose + apio-alpha-l-furanose|d-apio-alpha-l-furanose + apio-beta-l-furanose|d-apio-beta-l-furanose + l-apio-alpha-d-furanose + l-apio-beta-d-furanose + l-apio-alpha-l-furanose + l-apio-beta-l-furanose + + + + + galactosaminyl + glucosaminyl + mannosaminyl + fucosaminyl + quinovosaminyl + bacillosaminyl + garosaminyl + + neuraminyl|neuraminosyl + muramyl|muramosyl + isomuramyl|isomuramosyl + + lactosyl + lactosaminyl + lactosediaminyl - aldehydo + aldehydo - \ No newline at end of file + diff -Nru opsin-1.5.0/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/carboxylicAcids.xml opsin-2.3.1/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/carboxylicAcids.xml --- opsin-1.5.0/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/carboxylicAcids.xml 2013-07-21 14:02:29.000000000 +0000 +++ opsin-2.3.1/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/carboxylicAcids.xml 2017-07-23 20:55:18.000000000 +0000 @@ -1,289 +1,328 @@ - - + - - - form - acet - propion - propi - butyr - isobutyr - valer - isovaler - oxal - malon - succin - glutar + + + form + acet + propion|propi + butyr + isobutyr + valer + isovaler + oxal + malon + succin + glutar - acetoacet - caprin - capro - capryl - enanth - fol - dihydrofol - 7,8-dihydrofol - tetrahydrofol - 5,6,7,8-tetrahydrofol - 5,10-methenyl-5,6,7,8-tetrahydrofol - 5,10-methenyltetrahydrofol - 5,10-methenyl-tetrahydrofol - isocapro - isosuccin - mesoxal - naphthion - oxalacet - pelargon - pelarg - dimercaptosuccin - tetrol - + acetoacet + atrolact + biotin|d-biotin + caprin + capro + capryl + enanth + fol + dihydrofol|7,8-dihydrofol + tetrahydrofol|5,6,7,8-tetrahydrofol + 5,10-methenyl-5,6,7,8-tetrahydrofol|5,10-methenyltetrahydrofol|5,10-methenyl-tetrahydrofol + isocapro + isosuccin + lact|dl-lact + l-lact|l(+)-lact|l-(+)-lact + d-lact|d(-)-lact|d-(-)-lact + mesoxal + naphthion + orot + oxalacet + pelargon|pelarg + dimercaptosuccin + tetrol + - - - pival - adip - pimel - suber - azela - azel - sebac - acryl - methacryl - croton - isocroton - male - fumar - citracon - mesacon - camphor + + + pival + adip + pimel + suber + azela|azel + sebac + acryl + methacryl + croton + isocroton + male + fumar + citracon + mesacon + camphor + acetur + allophan + alpha-isoduryl + angel + benzil + beta-isoduryl + bicarbam + brassyl + citramal + cresot + o-cresot|ortho-cresot|o-cresotin|ortho-cresotin + m-cresot|meta-cresot|m-cresotin|meta-cresotin + p-cresot|para-cresot|p-cresotin|para-cresotin + gamma-isoduryl + glutacon + glycol + diglycol + glyoxyl|glyoxal + hippur + hydracryl + isonipecot + isocinchomeron + itacon + mal|dl-mal + l-mal|l(+)-mal|l-(+)-mal + d-mal|d(-)-mal|d-(-)-mal + malein + mandel|dl-mandel + l-mandel|l(+)-mandel|l-(+)-mandel + d-mandel|d(-)-mandel|d-(-)-mandel + thiomal + mucon + nipecot + orsellin|o-orsellin|ortho-orsellin + p-orsellin|para-orsellin + oxanil + d-pipecolin|d(+)-pipecolin|d-(+)-pipecolin|d-pipecol|d(+)-pipecol|d-(+)-pipecol + l-pipecolin|l(-)-pipecolin|l-(-)-pipecolin|l-pipecol|l(-)-pipecol|l-(-)-pipecol + pipecolin|pipecol + pyruv + sulfanil + thioglycol + alpha-resorcyl + beta-resorcyl + gamma-resorcyl + trimellit + trimes + - acetur - allophan - alpha-isoduryl - angel - benzil - beta-isoduryl - bicarbam - brassyl - citramal - cresot - o-cresot - ortho-cresot - o-cresotin - ortho-cresotin - m-cresot - meta-cresot - m-cresotin - meta-cresotin - p-cresot - para-cresot - p-cresotin - para-cresotin - gamma-isoduryl - glutacon - glycol - diglycol - glyoxyl - glyoxal - hippur - hydracryl - isonipecot - isocinchomeron - itacon - mal - thiomal - mucon - nipecot - oxanil - pipecolin - pipecol - pyruv - sulfanil - thioglycol - trimellit - trimes - + + + laur + myrist + palmit + stear + ole + elaid + benz + hydratrop + atrop + cinnam + nicotin + isonicotin + then - - - laur - myrist - palmit - stear - ole - elaid - benz - hydratrop - atrop - cinnam - nicotin - isonicotin - then - - anis - arachid - arachidyl - arach - arachidon - behen - behenol - brassid - cerebron - ceromeliss - ceroplast - cerot - citronell - conifer - clupanodon - eleostear - alpha-eleostear - beta-eleostear - eruc - farnes - gadole - gedd - gentis - geran - ghedd - glycer - glycid - gondo - homovanill - homoveratr - hydrocinnam - laccer - lact + anis + arachid|arachidyl|arach + arachidon + behen + behenol + brassid + caffe + cerebron + ceromeliss + ceroplast + cerot + citronell + coumar + conifer + clupanodon + eleostear + alpha-eleostear + beta-eleostear + eruc + ferul + farnes + gadole + gedd + gentis + geran + ghedd + glycer + glycid + gondo + homoanis + homogentis + homoisovanill + homoprotocatechu + homovanill + homoveratr + hydrocaffe + hydrocinnam + hydroferul + hydroisoferul + dihydrocaffe + dihydrocinnam + dihydroferul + dihydroisoferul + isoferul|iso-ferul + isostear|iso-stear + isovanill|iso-vanill + laccer - lignocer - linole - (9,12,15)-linolen - alpha-linolen - (6,9,12)-linolen - gamma-linolen - nervon - margar - meliss - montan - myristole - ner - palmitole - petroselin - petrosel - picol - protocatechu - psyll - punic - ricinole - ricinelaid - sinap - sorb - syring - vaccen - vanill - veratr - xyl - + lignocer + linole + (9,12,15)-linolen|alpha-linolen + (6,9,12)-linolen|gamma-linolen + nervon + margar + meliss + montan + myristole + ner + o-veratr|orthoveratr|ortho-veratr + o-homoveratr|orthohomoveratr|ortho-homoveratr + palmitole + penicillan + 6-aminopenicillan + petroselin|petrosel + picol + protocatechu + psyll + punic + ricinole + ricinelaid + sinap + sorb + syring + undecylen + vaccen + vanill + veratr + xyl + + + + + propiol + phthal + isophthal + terephthal + anthranil - - - propiol - phthal - isophthal - terephthal - anthranil + aconit + allant + asaron + asclepin + carbaz + cinchonin + citr + duryl + edet + gall + hemellit + hemimellit + henatriacontyl + heneicosyl + heptacosyl + heptadecyl + hexatriacontyl + homolevulin + homophthal + homoisophthal + homoterephthal + hydant + hydrangel + isocitr + isophthalon + itamal + itatartar + levulin + lutidin + m-hemipin + metahemipin + meta-hemipin + m-homosalicyl + meta-homosalicyl + maleur + melilot + mellit + + mellophan + mevalon + mevald + mucobrom + mucochlor + nonadecyl + nonacosyl + o-homosalicyl + ortho-homosalicyl + oxam + oxaldehyd + oxalur + p-homosalicyl + para-homosalicyl + pant + pentadecyl + pentacosyl + phloret + phthalon + piperonyl + prehnit + prehnityl + pristan + propargyl + pter + dihydropter|7,8-dihydropter + 5,6,7,8-tetrahydropter + 2-pyrocatechu + o-pyrocatechu + pyromellit + pyrotartar + quinald + salicyl + thiosalicyl + seneci + stearidon + tartar + dl-tartar + l-tartar + l(+)-tartar + l-(+)-tartar + dextrotartar + d-tartar + d(-)-tartar + d-(-)-tartar + levotartar + mesotartar + tartron + terephthalon + thaps + tigl + traumat + tricarballyl + tricosyl + tridecyl + trop + undecyl + umbell + uvit + valpr + vanilmandel + vanillomandel + vanillylmandel + - aconit - allant - asclepin - carbaz - cinchonin - citr - duryl - edet - gall - hemellit - hemimellit - henatriacontyl - heneicosyl - heptacosyl - heptadecyl - hexatriacontyl - homolevulin - hydant - hydrangel - isocitr - itamal - itatartar - levulin - m-homosalicyl - meta-homosalicyl - maleur - mellit - - mellophan - mevalon - mevald - mucobrom - mucochlor - nonadecyl - nonacosyl - o-homosalicyl - ortho-homosalicyl - oxam - oxaldehyd - oxalur - p-homosalicyl - para-homosalicyl - pant - pentadecyl - pentacosyl - piperonyl - prehnit - prehnityl - pristan - pter - dihydropter - 7,8-dihydropter - 5,6,7,8-tetrahydropter - pyromellit - pyrotartar - salicyl - thiosalicyl - seneci - stearidon - tartar - l-tartar - l(+)-tartar - l-(+)-tartar - dextrotartar - d-tartar - d(-)-tartar - d-(-)-tartar - levotartar - mesotartar - tartron - thaps - tigl - traumat - tricarballyl - tricosyl - tridecyl - trop - undecyl - uvit - valpr - + + + + leuc|dl-leuc + d-leuc + l-leuc - - - leuc - dl-leuc - d-leuc - l-leuc - - \ No newline at end of file + + n-butyr + + diff -Nru opsin-1.5.0/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/chargeAndOxidationNumberSpecifiers.xml opsin-2.3.1/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/chargeAndOxidationNumberSpecifiers.xml --- opsin-1.5.0/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/chargeAndOxidationNumberSpecifiers.xml 2013-07-21 14:02:29.000000000 +0000 +++ opsin-2.3.1/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/chargeAndOxidationNumberSpecifiers.xml 2017-07-23 20:55:18.000000000 +0000 @@ -1,53 +1,53 @@ - - - - (1+) - (2+) - (3+) - (4+) - (5+) - (6+) - (7+) - (8+) - (9+) - (+1) - (+2) - (+3) - (+4) - (+5) - (+6) - (+7) - (+8) - (+9) - (-1) - (-2) - (-3) - (-4) - (-5) - (-6) - (-7) - (-8) - (-9) - (1-) - (2-) - (3-) - (4-) - (5-) - (6-) - (7-) - (8-) - (9-) - - - (0) - (i) - (ii) - (iii) - (iv) - (v) - (vi) - (vii) - (viii) - (ix) - - \ No newline at end of file + + + + (1+) + (2+) + (3+) + (4+) + (5+) + (6+) + (7+) + (8+) + (9+) + (+1) + (+2) + (+3) + (+4) + (+5) + (+6) + (+7) + (+8) + (+9) + (-1) + (-2) + (-3) + (-4) + (-5) + (-6) + (-7) + (-8) + (-9) + (1-) + (2-) + (3-) + (4-) + (5-) + (6-) + (7-) + (8-) + (9-) + + + (0) + (i) + (ii) + (iii) + (iv) + (v) + (vi) + (vii) + (viii) + (ix) + + diff -Nru opsin-1.5.0/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/cyclicUnsaturableHydrocarbon.xml opsin-2.3.1/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/cyclicUnsaturableHydrocarbon.xml --- opsin-1.5.0/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/cyclicUnsaturableHydrocarbon.xml 2013-07-21 14:02:29.000000000 +0000 +++ opsin-2.3.1/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/cyclicUnsaturableHydrocarbon.xml 2017-07-23 20:55:18.000000000 +0000 @@ -1,32 +1,24 @@ - - + - - adamant - cub - prism - - - - - born - camph - bornyl - car - - menth - p-menth - para-menth - m-menth - meta-menth - o-menth - ortho-menth - norborn - norcamph - norbornyl - norcar - norpin - pin - thuj - - \ No newline at end of file + + adamant + cub + prism + + + + + born|camph|bornyl + car + + menth|p-menth|para-menth + m-menth|meta-menth + o-menth|ortho-menth + norborn|norcamph|norbornyl + norcar + norpin + pin + thuj + + + diff -Nru opsin-1.5.0/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/elementaryAtoms.xml opsin-2.3.1/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/elementaryAtoms.xml --- opsin-1.5.0/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/elementaryAtoms.xml 2013-07-21 14:02:29.000000000 +0000 +++ opsin-2.3.1/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/elementaryAtoms.xml 2017-07-23 20:55:18.000000000 +0000 @@ -1,136 +1,129 @@ - - + - lithium - sodium - natrium - potassium - kalium - rubidium - caesium - cesium - francium - - beryllium - magnesium - calcium - strontium - barium - radium - - aluminium - aluminum - gallium - indium - thallium - - tin - stannum - lead - plumbum + lithium + sodium|natrium + potassium|kalium + rubidium + caesium|cesium + francium + + beryllium + magnesium + calcium + strontium + barium + radium + + aluminium|aluminum + gallium + indium + thallium + + tin + stannum + lead|plumbum - bismuth + bismuth - polonium + polonium - scandium - titanium - vanadium - chromium - manganese - iron - cobalt - nickel - copper - zinc - - yttrium - zirconium - niobium - molybdenum - technetium - ruthenium - rhodium - palladium - silver - - cadmium - - lanthanum - cerium - praseodymium - neodymium - promethium - samarium - europium - gadolinium - terbium - dysprosium - holmium - erbium - thulium - ytterbium - lutetium - hafnium - tantalum - tungsten - - rhenium - osmium - iridium - platinum - gold - - mercury - hydrargyrum - - actinium - thorium - protactinium - uranium - neptunium - plutonium - americium - curium - berkelium - californium - einsteinium - fermium - mendelevium - nobelium - lawrencium - rutherfordium + scandium + titanium + vanadium + chromium + manganese + iron + cobalt + nickel + copper + zinc + + yttrium + zirconium + niobium + molybdenum + technetium + ruthenium + rhodium + palladium + silver + + cadmium + + lanthanum + cerium + praseodymium + neodymium + promethium + samarium + europium + gadolinium + terbium + dysprosium + holmium + erbium + thulium + ytterbium + lutetium + hafnium + tantalum + tungsten + + rhenium + osmium + iridium + platinum + gold + + mercury|hydrargyrum + + actinium + thorium + protactinium + uranium + neptunium + plutonium + americium + curium + berkelium + californium + einsteinium + fermium + mendelevium + nobelium + lawrencium + rutherfordium - boron + boron - carbon - silicon - germanium - - nitrogen - phosphorus - arsenic - antimony - stibium - - oxygen - sulfur - selenium - tellurium - polonium - - fluorine - chlorine - bromine - iodine - astatine - - helium - neon - argon - krypton - xenon - radon + carbon + silicon + germanium + + nitrogen + phosphorus + arsenic + antimony|stibium + + oxygen + sulfur + selenium + tellurium + + fluorine + chlorine + bromine + iodine + astatine + + helium + neon + argon + krypton + xenon + radon + actinon + thoron diff -Nru opsin-1.5.0/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/functionalTerms.xml opsin-2.3.1/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/functionalTerms.xml --- opsin-1.5.0/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/functionalTerms.xml 2013-07-21 14:02:29.000000000 +0000 +++ opsin-2.3.1/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/functionalTerms.xml 2017-07-23 20:55:18.000000000 +0000 @@ -1,163 +1,167 @@ - - - ester - glycol - fluorohydrin - chlorohydrin - bromohydrin - iodohydrin - cyanohydrin - - - - - oxime - thioxime - selenoxime - telluroxime - hydrazone - semicarbazone - thiosemicarbazone - selenosemicarbazone - tellurosemicarbazone - isosemicarbazone - isothiosemicarbazone - isoselenosemicarbazone - isotellurosemicarbazone - semioxamazone - - imide - imine - - oxide - monoxide - sulfide - selenide - telluride - - - - amide - azetidide - hydrazide - morpholide - piperazide - piperidide - pyrrolidide - - - - acetal - ketal - hemiacetal - hemiketal - - - - anhydride - thioanhydride - selenoanhydride - telluroanhydride - peroxyanhydride - dithioperoxyanhydride - diselenoperoxyanhydride - ditelluroperoxyanhydride - - - - oligo - poly - cyclo - + + ester + glycol + fluorohydrin + chlorohydrin + bromohydrin + iodohydrin + cyanohydrin + + + salt + + + + + oxime|oxim + thioxime|thioxim + selenoxime|selenoxim + telluroxime|telluroxim + hydrazone|hydrazon + semicarbazone|semicarbazon + thiosemicarbazone|thiosemicarbazon + selenosemicarbazone|selenosemicarbazon + tellurosemicarbazone|tellurosemicarbazon + isosemicarbazone|isosemicarbazon + isothiosemicarbazone|isothiosemicarbazon + isoselenosemicarbazone|isoselenosemicarbazon + isotellurosemicarbazone|isotellurosemicarbazon + semioxamazone|semioxamazon + imide|imid + imine|imin + + + oxide|oxid + sulfide|sulfid + selenide|selenid + telluride|tellurid + + + + amide|amid + azetidide|azetidid + hydrazide|hydrazid + morpholide|morpholid + piperazide|piperazid + piperidide|piperidid + pyrrolidide|pyrrolidid + + + + mercaptal + acetal|ketal + hemimercaptal + hemiacetal|hemiketal + hemithioacetal|hemithioketal + hemidithioacetal|hemidithioketal + + + + anhydride|anhydrid + thioanhydride|thioanhydrid + selenoanhydride|selenoanhydrid + telluroanhydride|telluroanhydrid + peroxyanhydride|peroxyanhydrid + dithioperoxyanhydride|dithioperoxyanhydrid + diselenoperoxyanhydride|diselenoperoxyanhydrid + ditelluroperoxyanhydride|ditelluroperoxyanhydrid + + + + oligo + poly + cyclo + - - alcohol - alcoholate - mercaptan - selenol - thiol - - - - ether - ketone - diketone - triketone - oxide - peroxide - selenide - diselenide - triselenide - selenone - diselenone - selenoxide - diselenoxide - selenoether - selenoketone - sulfide - disulfide - trisulfide - tetrasulfide - pentasulfide - hexasulfide - sulfone - disulfone - sulfoxide - disulfoxide - telluride - ditelluride - tritelluride - telluroether - telluroketone - tellurone - ditellurone - telluroxide - ditelluroxide - thioether - thioketone - - - - azide - bromide - chloride - cyanate - cyanide - deuteride - fluoride - fulminate - hydride - hydroperoxide - hydroselenide - hydrodiselenide - hydrotriselenide - hydrosulfide - hydrodisulfide - hydrotrisulfide - hydrotetrasulfide - hydrotelluride - hydroditelluride - hydrotritelluride - iodide - isocyanate - isocyanide - isofulminate - isonitrile - isoselenocyanate - isotellurocyanate - isothiocyanate - selenocyanate - selenofulminate - tellurocyanate - tellurofulminate - thiocyanate - thiofulminate - + + alcohol + alcoholate + mercaptan + selenol + thiol + + + + ether + ketone|keton + diketone|diketon + triketone|triketon + ketoxime|ketoxim + oxide|oxid + peroxide|peroxid + selenide|selenid + diselenide|diselenid + triselenide|triselenid + selenone|selenon + diselenone|diselenon + selenoxide|selenoxid + diselenoxide|diselenoxid + selenoether + selenoketone|selenoketon + sulfide|sulfid + disulfide|disulfid + trisulfide|trisulfid + tetrasulfide|tetrasulfid + pentasulfide|pentasulfid + hexasulfide|hexasulfid + sulfone|sulfon + disulfone|disulfon + sulfoxide|sulfoxid + disulfoxide|disulfoxid + telluride|tellurid + ditelluride|ditellurid + tritelluride|tritellurid + telluroether + telluroketone|telluroketon + tellurone|telluron + ditellurone|ditelluron + telluroxide|telluroxid + ditelluroxide|ditelluroxid + thioether + thioketone|thioketon + + + + azide|azid + bromide|bromid + chloride|chlorid + cyanate|cyanat + cyanide|cyanid + deuteride|deuterid + fluoride|fluorid + fulminate|fulminat + hydride|hydrid + hydroperoxide|hydroperoxid + hydroselenide|hydroselenid + hydrodiselenide|hydrodiselenid + hydrotriselenide|hydrotriselenid + hydrosulfide|hydrosulfid + hydrodisulfide|hydrodisulfid + hydrotrisulfide|hydrotrisulfid + hydrotetrasulfide|hydrotetrasulfid + hydrotelluride|hydrotellurid + hydroditelluride|hydroditellurid + hydrotritelluride|hydrotritellurid + iodide|iodid + isocyanate|isocyanat + isocyanide|isocyanid + isofulminate|isofulminat + isonitrile|isonitril + isoselenocyanate|isoselenocyanat + isotellurocyanate|isotellurocyanat + isothiocyanate|isothiocyanat + selenocyanate|selenocyanat + selenofulminate|selenofulminat + tellurocyanate|tellurocyanat + tellurofulminate|tellurofulminat + thiocyanate|thiocyanat + thiofulminate|thiofulminat + diff -Nru opsin-1.5.0/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/fusionComponents.xml opsin-2.3.1/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/fusionComponents.xml --- opsin-1.5.0/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/fusionComponents.xml 2013-07-21 14:02:29.000000000 +0000 +++ opsin-2.3.1/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/fusionComponents.xml 2017-07-23 20:55:18.000000000 +0000 @@ -1,178 +1,158 @@ - - - - - - - benzo - benz - - - - naphthyridino - phenanthrolino - - - aceanthryleno - acenaphthyleno - acenaphth - acenaphtho - acephenanthryleno - acridarsino - acridino - acridophosphino - acrindolino - anthyridino - anthraceno - anthra - anthrazino - arsanthreno - arsanthridino - arsindolizino - arsindolo - arsinolino - arsinolizino - azuleno - benzeno - betacarbolino - beta-carbolino - b-carbolino - biphenyleno - boranthreno - carbazolo - chromeno - thiochromeno - selenochromeno - tellurochromeno - chryseno - cinnolino - coroneno - fluorantheno - fluoreno - furano - furo - imidazolo - imidazo - as-indaceno - s-indaceno - indazolo - indeno - indolo - indolizino - isoarsindolo - isoarsinolino - isobenzofurano - isobenzofuro - isochromeno - isothiochromeno - isoselenochromeno - isotellurochromeno - isoindolo - isooxazolo - isoxazolo - isophosphindolo - isophosphinolino - isoquino - isoquinolino - isoselenazolo - isotellurazolo - isothiazolo - mercuranthreno - naphthaceno - naphthaleno - naphth - naphtho - ovaleno - oxanthreno - perimidino - peryleno - perylo - phenaleno - phenanthrazino - phenanthreno - phenanthridino - phenanthro - phenazino - phenoxazino - phenothiazino - phenoselenazino - phenotellurazino - phenophosphazinino - phenophosphazino - phenarsazinino - phenazasilino - phenoarsazino - phenomercurazino - phenomercazino - phenoxathiino - phenoxaselenino - phenoxasilin - phenoxatellurino - phenoxaphosphinino - phenoxaphosphino - phenoxarsinino - phenoxarsino - phenoxastibinino - phenoxantimonino - phenothiarsinino - phenothiarsino - phenomercurino - phosphanthreno - phosphanthridino - phosphindolo - phosphindolizino - phosphinolino - phosphinolizino - phthalazino - phthaloperino - piceno - pleiadeno - pteridino - purino - pyrano - thiopyrano - selenopyrano - telluropyrano - pyranthreno - pyrazino - pyrazolo - pyreno - pyridazino - pyridino - pyrido - pyrimidino - pyrimido - pyrrolizino - pyrrolo - quinazolino - quindolino - quinindolino - quino - quinolino - quinolizino - quinoxalino - rubiceno - selenanthreno - selenopheno - silanthreno - s-triazino - sym-triazino - s-triazolo - sym-triazolo - telluranthreno - telluropheno - thebenidino - thianthreno - thieno - thiopheno - trindeno - trinaphthyleno - triphenodioxazino - triphenodithiazino - triphenyleno - xantheno - thioxantheno - selenoxantheno - telluroxantheno - - \ No newline at end of file + + + + + + benzo|benz + + + + naphthyridino + phenanthrolino + + + aceanthryleno + acenaphthyleno|acenaphth|acenaphtho + acephenanthryleno + acridarsino + acridino + acridophosphino + acrindolino + anthyridino + anthraceno|anthra + anthrazino + arsanthreno + arsanthridino + arsindolizino + arsindolo + arsinolino + arsinolizino + azuleno + benzeno + betacarbolino|beta-carbolino|b-carbolino + biphenyleno + boranthreno + carbazolo + chinolino + chromeno + thiochromeno + selenochromeno + tellurochromeno + chryseno + cinnolino + coroneno + fluorantheno + fluoreno + furano + furazano + furo + imidazolo|imidazo + as-indaceno + s-indaceno + indazolo + indeno + indolizidino + indolizino + indolo + isoarsindolo + isoarsinolino + isobenzofurano|isobenzofuro + isochinolino + isochromeno + isothiochromeno + isoselenochromeno + isotellurochromeno + isoindolo + isooxazolo|isoxazolo + isophosphindolo + isophosphinolino + isoquino|isoquinolino + isoselenazolo + isotellurazolo + isothiazolo + mercuranthreno + naphthaceno + naphthaleno|naphthalino|naphth|naphtho + ovaleno + oxanthreno + perimidino + peryleno|perylo + phenaleno + phenanthrazino + phenanthreno + phenanthridino + phenanthro + phenazino + phenoxazino + phenothiazino + phenoselenazino + phenotellurazino + phenophosphazinino|phenophosphazino + phenarsazinino + phenazasilino + phenoarsazino + phenomercurazino|phenomercazino + phenoxathiino + phenoxaselenino + phenoxasilin + phenoxatellurino + phenoxaphosphinino|phenoxaphosphino + phenoxarsinino|phenoxarsino + phenoxastibinino|phenoxantimonino + phenothiarsinino|phenothiarsino + phenomercurino + phosphanthreno + phosphanthridino + phosphindolo + phosphindolizino + phosphinolino + phosphinolizino + phthalazino + phthaloperino + piceno + pleiadeno + pteridino + purino + pyrano + thiopyrano + selenopyrano + telluropyrano + pyranthreno + pyrazino + pyrazolo + pyreno + pyridazino + pyridino|pyrido + pyrimidino|pyrimido + pyrrolizidino + pyrrolizino + pyrrolo + quinazolino + quindolino + quinindolino + quino|quinolino + quinolizidino + quinolizino + quinoxalino + rubiceno + selenanthreno + selenopheno + silanthreno + s-triazino|sym-triazino + s-triazolo|sym-triazolo + telluranthreno + telluropheno + thebenidino + thianthreno + thieno|thiopheno + trindeno + trinaphthyleno + triphenodioxazino + triphenodithiazino + triphenyleno + xantheno + thioxantheno + selenoxantheno + telluroxantheno + + diff -Nru opsin-1.5.0/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/germanTokens.xml opsin-2.3.1/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/germanTokens.xml --- opsin-1.5.0/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/germanTokens.xml 2013-07-21 14:02:29.000000000 +0000 +++ opsin-2.3.1/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/germanTokens.xml 2017-07-23 20:55:18.000000000 +0000 @@ -1,19 +1,18 @@ - - brom - chlor - fluor - iod + brom + chlor + fluor + iod - perfluor - perbrom - perchlor - period + perfluor + perbrom + perchlor + period diff -Nru opsin-1.5.0/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/groupStemsAllowingAllSuffixes.xml opsin-2.3.1/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/groupStemsAllowingAllSuffixes.xml --- opsin-1.5.0/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/groupStemsAllowingAllSuffixes.xml 2013-07-21 14:02:29.000000000 +0000 +++ opsin-2.3.1/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/groupStemsAllowingAllSuffixes.xml 2017-07-23 20:55:18.000000000 +0000 @@ -1,10 +1,9 @@ - - - - - hydrazin - acetylen - - hydroxylamin - phytan - + + + + hydrazin + acetylen + + hydroxylamin + phytan + diff -Nru opsin-1.5.0/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/groupStemsAllowingInlineSuffixes.xml opsin-2.3.1/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/groupStemsAllowingInlineSuffixes.xml --- opsin-1.5.0/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/groupStemsAllowingInlineSuffixes.xml 2013-07-21 14:02:29.000000000 +0000 +++ opsin-2.3.1/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/groupStemsAllowingInlineSuffixes.xml 2017-07-23 20:55:18.000000000 +0000 @@ -1,65 +1,63 @@ - - - - - - allen - formazan - isodiazen - keten - urethan - - isopren - - - ammonium - phosphonium - arsonium - stibonium - bismuthonium - oxonium - sulfonium - selenonium - telluronium - fluoronium - chloronium - bromonium - iodonium - - - silylium - germylium - stannylium - plumbylium - - - phosphine - arsine - stibin - bismuthin - - - aceton - propionon - butyron - valeron - enanthon - caprylon - - isobutyron - isovaleron - - lauron - myriston - palmiton - stearon - - - glutathion - glycerol - glycerin - sn-glycerol - guanidin - saccharin - - \ No newline at end of file + + + + + allen + formazan + isodiazen + keten + urethan + + isopren + + + ammonium + phosphonium + arsonium + stibonium + bismuthonium + oxonium + sulfonium + selenonium + telluronium + fluoronium + chloronium + bromonium + iodonium + + + silylium + germylium + stannylium + plumbylium + + + phosphine + arsine + stibin + bismuthin + + + aceton + propionon + butyron + valeron + enanthon + caprylon + + isobutyron + isovaleron + + lauron + myriston + palmiton + stearon + + + glutathion + glycerol|glycerin + sn-glycerol + guanidin + saccharin + + diff -Nru opsin-1.5.0/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/heteroAtoms.xml opsin-2.3.1/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/heteroAtoms.xml --- opsin-1.5.0/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/heteroAtoms.xml 2013-07-21 14:02:29.000000000 +0000 +++ opsin-2.3.1/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/heteroAtoms.xml 2017-07-23 20:55:18.000000000 +0000 @@ -1,304 +1,302 @@ - - - - - fluora - chlora - broma - ioda - astata - oxa - thia - selena - tellura - polona - aza - phospha - arsa - stiba - bisma - carba - sila - germa - stanna - plumba - bora - alumina - galla - inda - thalla - zinca - cadma - mercura - cupra - argenta - aura - nickela - pallada - platina - darmstadta - cobalta - rhoda - irida - meitnera - ferra - ruthena - osma - hassa - mangana - techneta - rhena - bohra - chroma - molybda - tungsta - seaborga - vanada - nioba - tantala - dubna - titana - zircona - hafna - rutherforda - scanda - yttra - lanthana - cera - praseodyma - neodyma - prometha - samara - europa - gadolina - terba - dysprosa - holma - erba - thula - ytterba - luteta - actina - thora - protactina - urana - neptuna - plutona - america - cura - berkela - californa - einsteina - ferma - mendeleva - nobela - lawrenca - berylla - magnesa - calca - stronta - bara - rada - litha - soda - potassa - rubida - caesa - franca - hela - neona - argona - kryptona - xenona - radona - - fluoronia - chloronia - bromonia - iodonia - astatonia - oxonia - thionia - selenonia - telluronia - polononia - azonia - phosphonia - arsonia - stibonia - bismuthonia - carbonia - silonia - germonia - stannonia - plumbonia - boronia - aluminonia - gallonia - indonia - thallonia - zinconia - cadmonia - mercuronia - cupronia - argentonia - auronia - nickelonia - palladonia - platinonia - darmstadtonia - cobaltonia - rhodonia - iridonia - meitneronia - ferronia - ruthenonia - osmonia - hassonia - manganonia - technetonia - rhenonia - bohronia - chromonia - molybdonia - tungstonia - seaborgonia - vanadonia - niobonia - tantalonia - dubnonia - titanonia - zircononia - hafnonia - rutherfordonia - scandonia - yttronia - lanthanonia - ceronia - praseodymonia - neodymonia - promethonia - samaronia - europonia - gadolinonia - terbonia - dysprosonia - holmonia - erbonia - thulonia - ytterbonia - lutetonia - actinonia - thoronia - protactinonia - uranonia - neptunonia - plutononia - americonia - curonia - berkelonia - californonia - einsteinonia - fermonia - mendelevonia - nobelonia - lawrenconia - beryllonia - magnesonia - calconia - strontonia - baronia - radonia - lithonia - sodonia - potassonia - rubidonia - caesonia - franconia - helonia - neononia - argononia - kryptononia - xenononia - radononia - - fluoranylia - chloranylia - bromanylia - iodanylia - astatanylia - oxidanylia - sulfanylia - selanylia - tellanylia - polanylia - azanylia - phosphanylia - arsanylia - stibanylia - bismuthanylia - carbanylia - silanylia - germanylia - stannanylia - plumbanylia - boranylia - alumanylia - indiganylia - gallanylia - thallanylia - - fluoranida - chloranida - bromanida - iodanida - astatanida - oxidanida - sulfanida - selanida - tellanida - polanida - azanida - phosphanida - arsanida - stibanida - bismuthanida - carbanida - silanida - germanida - stannanida - plumbanida - boranida - alumanida - indiganida - gallanida - thallanida - - fluoranuida - chloranuida - bromanuida - iodanuida - astatanuida - oxidanuida - sulfanuida - selanuida - tellanuida - polanuida - azanuida - phosphanuida - arsanuida - stibanuida - bismuthanuida - carbanuida - silanuida - germanuida - stannanuida - plumbanuida - boranuida - alumanuida - indiganuida - gallanuida - thallanuida - - - + + + + + fluora + chlora + broma + ioda + astata + oxa + thia + selena + tellura + polona + aza + phospha + arsa + stiba + bisma + carba + sila + germa + stanna + plumba + bora + alumina + galla + inda + thalla + zinca + cadma + mercura + cupra + argenta + aura + nickela + pallada + platina + darmstadta + cobalta + rhoda + irida + meitnera + ferra + ruthena + osma + hassa + mangana + techneta + rhena + bohra + chroma + molybda + tungsta + seaborga + vanada + nioba + tantala + dubna + titana + zircona + hafna + rutherforda + scanda + yttra + lanthana + cera + praseodyma + neodyma + prometha + samara + europa + gadolina + terba + dysprosa + holma + erba + thula + ytterba + luteta + actina + thora + protactina + urana + neptuna + plutona + america + cura + berkela + californa + einsteina + ferma + mendeleva + nobela + lawrenca + berylla + magnesa + calca + stronta + bara + rada + litha + soda + potassa + rubida + caesa + franca + hela + neona + argona + kryptona + xenona + radona + + fluoronia + chloronia + bromonia + iodonia + astatonia + oxonia + thionia + selenonia + telluronia + polononia + azonia + phosphonia + arsonia + stibonia + bismuthonia + carbonia + silonia + germonia + stannonia + plumbonia + boronia + aluminonia + gallonia + indonia + thallonia + zinconia + cadmonia + mercuronia + cupronia + argentonia + auronia + nickelonia + palladonia + platinonia + darmstadtonia + cobaltonia + rhodonia + iridonia + meitneronia + ferronia + ruthenonia + osmonia + hassonia + manganonia + technetonia + rhenonia + bohronia + chromonia + molybdonia + tungstonia + seaborgonia + vanadonia + niobonia + tantalonia + dubnonia + titanonia + zircononia + hafnonia + rutherfordonia + scandonia + yttronia + lanthanonia + ceronia + praseodymonia + neodymonia + promethonia + samaronia + europonia + gadolinonia + terbonia + dysprosonia + holmonia + erbonia + thulonia + ytterbonia + lutetonia + actinonia + thoronia + protactinonia + uranonia + neptunonia + plutononia + americonia + curonia + berkelonia + californonia + einsteinonia + fermonia + mendelevonia + nobelonia + lawrenconia + beryllonia + magnesonia + calconia + strontonia + baronia + radonia + lithonia + sodonia + potassonia + rubidonia + caesonia + franconia + helonia + neononia + argononia + kryptononia + xenononia + radononia + + fluoranylia + chloranylia + bromanylia + iodanylia + astatanylia + oxidanylia + sulfanylia + selanylia + tellanylia + polanylia + azanylia + phosphanylia + arsanylia + stibanylia + bismuthanylia + carbanylia + silanylia + germanylia + stannanylia + plumbanylia + boranylia + alumanylia + indiganylia + gallanylia + thallanylia + + fluoranida + chloranida + bromanida + iodanida + astatanida + oxidanida + sulfanida + selanida + tellanida + polanida + azanida + phosphanida + arsanida + stibanida + bismuthanida + carbanida + silanida + germanida + stannanida + plumbanida + boranida + alumanida + indiganida + gallanida + thallanida + + fluoranuida + chloranuida + bromanuida + iodanuida + astatanuida + oxidanuida + sulfanuida + selanuida + tellanuida + polanuida + azanuida + phosphanuida + arsanuida + stibanuida + bismuthanuida + carbanuida + silanuida + germanuida + stannanuida + plumbanuida + boranuida + alumanuida + indiganuida + gallanuida + thallanuida + diff -Nru opsin-1.5.0/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/hwHeteroAtoms.xml opsin-2.3.1/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/hwHeteroAtoms.xml --- opsin-1.5.0/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/hwHeteroAtoms.xml 2013-07-21 14:02:29.000000000 +0000 +++ opsin-2.3.1/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/hwHeteroAtoms.xml 2017-07-23 20:55:18.000000000 +0000 @@ -1,106 +1,136 @@ - - - - - - fluora - chlora - broma - ioda - oxa - thia - selena - tellura - aza - phospha - arsa - stiba - bisma - sila - germa - stanna - plumba - bora - aluma - galla - indiga - thalla - mercura - - fluor - chlor - brom - iod - ox - thi - selen - tellur - az - phosph - ars - stib - bism - sil - germ - stann - plumb - bor - alum - gall - indig - thall - mercur - - phosphor - arsen - antimon - - - - oxa - thia - selena - tellura - aza - bisma - sila - germa - stanna - plumba - mercura - - ox - thi - selen - tellur - az - bism - sil - germ - stann - plumb - mercur - - phosphor - arsen - antimon - - - - oxa - thia - selena - tellura - bisma - mercura - - ox - thi - selen - tellur - bism - mercur - - arsen - - + + + + + fluora + chlora + broma + ioda + oxa + thia + selena + tellura + aza + phospha + arsa + stiba + bisma + sila + germa + stanna + plumba + bora + aluma + galla + indiga + thalla + mercura + + fluor + chlor + brom + iod + ox + thi + selen + tellur + az + phosph + ars + stib + bism + sil + germ + stann + plumb + bor + alum + gall + indig + thall + mercur + + phosphor + arsen + antimon + + + + oxa + thia + selena + tellura + aza + bisma + sila + germa + stanna + plumba + mercura + + ox + thi + selen + tellur + az + bism + sil + germ + stann + plumb + mercur + + phosphor + arsen + antimon + + + + oxa + thia + selena + tellura + bisma + mercura + + ox + thi + selen + tellur + bism + mercur + + arsen + + + + + + aluma + indiga + + fluor + chlor + brom + iod + ox + thi + selen + tellur + az + phosph + ars + stib + bism + sil + germ + stann + plumb + bor + alum + gall + indig + thall + mercur + + diff -Nru opsin-1.5.0/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/hwSuffixes.xml opsin-2.3.1/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/hwSuffixes.xml --- opsin-1.5.0/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/hwSuffixes.xml 2013-07-21 14:02:29.000000000 +0000 +++ opsin-2.3.1/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/hwSuffixes.xml 2017-07-23 20:55:18.000000000 +0000 @@ -1,80 +1,71 @@ - - - - - - iren - iran - irin - iridin - - et - etan - etidin - etin - eten - - ol - olan - olidin - olid - olin - olen - - inan - inin - - epin - epan - - ocin - ocan - - onin - onan - - ecin - ecan - - - cycloundecin - cyclododecin - cyclotridecin - cyclotetradecin - cyclopentadecin - cyclohexadecin - cycloheptadecin - cyclooctadecin - cyclononadecin - cycloeicosin - cycloicosin - cyclohenicosin - cyclodocosin - cyclotricosin - cyclotetracosin - cyclopentacosin - cyclohexacosin - cycloheptacosin - cyclooctacosin - cyclononacosin - cyclotriacontin - cyclohentriacontin - cyclodotriacontin - cyclotritriacontin - cyclotetratriacontin - cyclopentatriacontin - cyclohexatriacontin - cycloheptatriacontin - cyclooctatriacontin - cyclononatriacontin - cyclotetracontin - - - - in - - - - an - - \ No newline at end of file + + + + + iren|irin + iran|iridin + + et + etan|etidin + etin|eten + + ol + olan|olidin + olin|olen + + inan + inin + + epin + epan + + ocin + ocan + + onin + onan + + ecin + ecan + + + cycloundecin + cyclododecin + cyclotridecin + cyclotetradecin + cyclopentadecin + cyclohexadecin + cycloheptadecin + cyclooctadecin + cyclononadecin + cycloeicosin|cycloicosin + cyclohenicosin + cyclodocosin + cyclotricosin + cyclotetracosin + cyclopentacosin + cyclohexacosin + cycloheptacosin + cyclooctacosin + cyclononacosin + cyclotriacontin + cyclohentriacontin + cyclodotriacontin + cyclotritriacontin + cyclotetratriacontin + cyclopentatriacontin + cyclohexatriacontin + cycloheptatriacontin + cyclooctatriacontin + cyclononatriacontin + cyclotetracontin + + + + in + + + + an + + diff -Nru opsin-1.5.0/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/index.xml opsin-2.3.1/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/index.xml --- opsin-1.5.0/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/index.xml 2013-07-21 14:02:29.000000000 +0000 +++ opsin-2.3.1/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/index.xml 2017-07-23 20:55:18.000000000 +0000 @@ -1,41 +1,41 @@ - + - - arylSubstituents.xml - multiRadicalSubstituents.xml - simpleSubstituents.xml - substituents.xml - - arylGroups.xml - simpleGroups.xml - simpleCyclicGroups.xml - groupStemsAllowingAllSuffixes.xml - groupStemsAllowingInlineSuffixes.xml - cyclicUnsaturableHydrocarbon.xml - elementaryAtoms.xml - aminoAcids.xml - carbohydrates.xml - naturalProducts.xml - - alkanes.xml - atomHydrides.xml - - carboxylicAcids.xml - nonCarboxylicAcids.xml - - chargeAndOxidationNumberSpecifiers.xml - functionalTerms.xml - heteroAtoms.xml - hwHeteroAtoms.xml - hwSuffixes.xml - fusionComponents.xml - multipliers.xml - infixes.xml - inlineSuffixes.xml - inlineChargeSuffixes.xml - suffixPrefix.xml - carbohydrateSuffixes.xml - suffixes.xml - unsaturators.xml - miscTokens.xml - + + arylSubstituents.xml + multiRadicalSubstituents.xml + simpleSubstituents.xml + substituents.xml + + arylGroups.xml + simpleGroups.xml + simpleCyclicGroups.xml + groupStemsAllowingAllSuffixes.xml + groupStemsAllowingInlineSuffixes.xml + cyclicUnsaturableHydrocarbon.xml + elementaryAtoms.xml + aminoAcids.xml + carbohydrates.xml + naturalProducts.xml + + alkanes.xml + atomHydrides.xml + + carboxylicAcids.xml + nonCarboxylicAcids.xml + + chargeAndOxidationNumberSpecifiers.xml + functionalTerms.xml + heteroAtoms.xml + hwHeteroAtoms.xml + hwSuffixes.xml + fusionComponents.xml + multipliers.xml + infixes.xml + inlineSuffixes.xml + inlineChargeSuffixes.xml + suffixPrefix.xml + carbohydrateSuffixes.xml + suffixes.xml + unsaturators.xml + miscTokens.xml + diff -Nru opsin-1.5.0/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/infixes.xml opsin-2.3.1/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/infixes.xml --- opsin-1.5.0/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/infixes.xml 2013-07-21 14:02:29.000000000 +0000 +++ opsin-2.3.1/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/infixes.xml 2017-07-23 20:55:18.000000000 +0000 @@ -1,46 +1,43 @@ - - - - amid - azid - bromid - chlorid - cyanatid - cyanid - dithioperox - diselenoperox - ditelluroperox - fluorid - hydrazid - hydrazon - imid - iodid - isocyanatid - isocyanid - isothiocyanatid - isoselenocyanatid - isotellurocyanatid - nitrid - perox - selen - tellur - thi - thiocyanatid - selenocyanatid - tellurocyanatid - - - - - hydroxim - - \ No newline at end of file + + + + amid + azid + bromid + chlorid + cyanatid + cyanid + dithioperox + diselenoperox + ditelluroperox + fluorid + hydrazid + hydrazon + imid + iodid + isocyanatid + isocyanid + isothiocyanatid + isoselenocyanatid + isotellurocyanatid + nitrid + perox + selen + tellur + thi + thiocyanatid + selenocyanatid + tellurocyanatid + + + + + hydroxim + diff -Nru opsin-1.5.0/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/inlineChargeSuffixes.xml opsin-2.3.1/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/inlineChargeSuffixes.xml --- opsin-1.5.0/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/inlineChargeSuffixes.xml 2013-07-21 14:02:29.000000000 +0000 +++ opsin-2.3.1/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/inlineChargeSuffixes.xml 2017-07-23 20:55:18.000000000 +0000 @@ -1,19 +1,15 @@ - - - - - - ium - ide - id - ylium - (ylium) - uide - uid - - - - ylium - (ylium) - - \ No newline at end of file + + + + + ium + ide|id + ylium|(ylium) + uide|uid + + + + + ylium|(ylium) + + diff -Nru opsin-1.5.0/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/inlineSuffixes.xml opsin-2.3.1/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/inlineSuffixes.xml --- opsin-1.5.0/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/inlineSuffixes.xml 2013-07-21 14:02:29.000000000 +0000 +++ opsin-2.3.1/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/inlineSuffixes.xml 2017-07-23 20:55:18.000000000 +0000 @@ -1,80 +1,70 @@ - - - - - - aldehydoyl - amido - amidyl - aminyl - amoyl - aniloyl - carbonyl - hydrazido - io - iminyl - oxy - oyl - selenenyl - seleninyl - selenonyl - sulfenamido - sulfenoselenoyl - sulfenothioyl - sulfenyl - sulfinyl - sulfonyl - tellurenyl - tellurinyl - telluronyl - yl - ylidene - ylidyne - yliden - ylidyn - - - - - imido - imidyl - - - - amido - oyl - yl - ylidene - ylidyne - yliden - ylidyn - - - - amido - amidyl - carbonyl - hydrazido - oxy - oyl - - - - amido - amidyl - hydrazido - oyl - - - - yl - ylidene - yliden - - - - - ylene - ylen - - \ No newline at end of file + + + + + aldehydoyl + amido|amidyl + aminyl + amoyl + aniloyl + carbonimidoyl + carbonyl + hydrazido + io + iminyl + oximino + oxy + oyl + selenenyl + seleninyl + selenonyl + sulfenamido + sulfenoselenoyl + sulfenothioyl + sulfenyl + sulfinyl + sulfonyl + tellurenyl + tellurinyl + telluronyl + yl + ylidene|yliden + ylidyne|ylidyn + + + + + imido|imidyl + + + + oyl + yl + ylidene|yliden + ylidyne|ylidyn + + + + amido|amidyl + carbonyl + hydrazido + oxy + oyl + + + + amido|amidyl + hydrazido + oyl + + + + yl + ylidene|yliden + + + + + ylene|ylen + + diff -Nru opsin-1.5.0/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/miscTokens.xml opsin-2.3.1/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/miscTokens.xml --- opsin-1.5.0/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/miscTokens.xml 2013-07-21 14:02:29.000000000 +0000 +++ opsin-2.3.1/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/miscTokens.xml 2017-07-23 20:55:18.000000000 +0000 @@ -1,188 +1,199 @@ - - - - - cen - len - phen - phenylen - naphthylen - helicen - - - - cyclo - cyclo- - - - - hydro - - - perhydro - - - - cis - trans - - - - ine - in - - - - thio - seleno - telluro - - - - ylamine - - - - spirobi - spirobi- - spiroter - spiroter- - - - - spiro - spiro- - - - - spiro - spiro- - - - - d- - (d)- - l- - (l)- - dl- - (dl)- - - ds- - dg- - ls- - lg- - - - - - deoxy - desoxy - - - - dehydro - - - - anhydro - - - - - deoxy - desoxy - - - - dehydro - - - - anhydro - - - - - - - - - - - , - - - - ( - [ - { - - - - ) - ] - } - - - - ( - [ - { - - - - ) - ] - } - - - - ( - [ - { - - - - ) - ] - } - - - - a - - - - e - - - - o - - - - o - - - - , - - - - - - - - o - - - - - - - - - - - - - - \ No newline at end of file + + + + + cen + len + phen + phenylen + naphthylen + helicen + + + + cyclo|cyclo- + + + + hydro + + + perhydro + + + + cis + trans + + + + endo + exo + syn + anti + + + + r + + + + + ine|in + + + + thio + seleno + telluro + + + + ylamine|ylamin + + + + spirobi|spirobi- + spiroter|spiroter- + + + + spiro|spiro- + + + + spiro|spiro- + + + + d-|(d)- + l-|(l)- + dl-|d,l-|(dl)- + + ds- + dg- + ls- + lg- + + + + + deoxy + desoxy + deamino + desamino + demethyl + desmethyl + + + + dehydro + + + + anhydro + + + + + deoxy + desoxy + deamino + desamino + demethyl + desmethyl + + + + dehydro + + + + anhydro + + + + - + + + + - + , + + + + ( + [ + { + + + + ) + ] + } + + + + ( + [ + { + + + + ) + ] + } + + + + ( + [ + { + + + + ) + ] + } + + + + a + + + + e + + + + o + + + + o + + + + , + + + + + + + + o + + + + + + + + + + + + + + diff -Nru opsin-1.5.0/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/multipliers.xml opsin-2.3.1/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/multipliers.xml --- opsin-1.5.0/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/multipliers.xml 2013-07-21 14:02:29.000000000 +0000 +++ opsin-2.3.1/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/multipliers.xml 2017-07-23 20:55:18.000000000 +0000 @@ -1,138 +1,144 @@ - - - - - - mono - - - - di - tri - - - - tetr - pent - hex - hept - oct - non - dec - undec - dodec - tridec - tetradec - pentadec - hexadec - heptadec - octadec - nonadec - eicos - icos - henicos - heneicos - docos - tricos - tetracos - pentacos - hexacos - heptacos - octacos - nonacos - triacont - hentriacont - dotriacont - tritriacont - tetratriacont - pentatriacont - hexatriacont - heptatriacont - octatriacont - nonatriacont - tetracont - hentetracont - dotetracont - tritetracont - tetratetracont - pentatetracont - hexatetracont - heptatetracont - octatetracont - nonatetracont - pentacont - - - - bis - tris - tetrakis - pentakis - hexakis - heptakis - octakis - nonakis - decakis - undecakis - dodecakis - tridecakis - tetradecakis - pentadecakis - hexadecakis - heptadecakis - octadecakis - nonadecakis - eicosakis - icosakis - henicosakis - heneicosakis - docosakis - tricosakis - tetracosakis - pentacosakis - hexacosakis - heptacosakis - octacosakis - nonacosakis - triacontakis - - - - bi - tri - tetra - penta - hexa - hepta - octa - nona - deca - undeca - dodeca - trideca - tetradeca - pentadeca - hexadeca - heptadeca - octadeca - nonadeca - eicosa - - - - bi - ter - quater - quinque - sexi - septi - octi - novi - deci - undeci - dodeci - trideci - tetradeci - pentadeci - - + + + + + mono + + + + mon + + + + di + tri + + + + tetr + pent + hex + hept + oct + non + dec + undec + dodec + tridec + tetradec + pentadec + hexadec + heptadec + octadec + nonadec + eicos|icos + henicos|heneicos + docos + tricos + tetracos + pentacos + hexacos + heptacos + octacos + nonacos + triacont + hentriacont + dotriacont + tritriacont + tetratriacont + pentatriacont + hexatriacont + heptatriacont + octatriacont + nonatriacont + tetracont + hentetracont + dotetracont + tritetracont + tetratetracont + pentatetracont + hexatetracont + heptatetracont + octatetracont + nonatetracont + pentacont + + + + bis + tris + tetrakis + pentakis + hexakis + heptakis + octakis + nonakis + decakis + undecakis + dodecakis + tridecakis + tetradecakis + pentadecakis + hexadecakis + heptadecakis + octadecakis + nonadecakis + eicosakis|icosakis + henicosakis|heneicosakis + docosakis + tricosakis + tetracosakis + pentacosakis + hexacosakis + heptacosakis + octacosakis + nonacosakis + triacontakis + + + + bi + tri + tetra + penta + hexa + hepta + octa + nona + deca + undeca + dodeca + trideca + tetradeca + pentadeca + hexadeca + heptadeca + octadeca + nonadeca + eicosa + + + + bi + ter + quater + quinque + sexi + septi + octi + novi + deci + undeci + dodeci + trideci + tetradeci + pentadeci + + + + hemi + sesqui + hemipenta + hemihepta + + diff -Nru opsin-1.5.0/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/multiRadicalSubstituents.xml opsin-2.3.1/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/multiRadicalSubstituents.xml --- opsin-1.5.0/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/multiRadicalSubstituents.xml 2013-07-21 14:02:29.000000000 +0000 +++ opsin-2.3.1/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/multiRadicalSubstituents.xml 2017-07-23 20:55:18.000000000 +0000 @@ -1,112 +1,94 @@ - - + + - - - methylene - methylen - ethylene - ethylen - propylene - propylen - butylene - butylen - pentylene - pentylen - hexylene - hexylen - heptylene - heptylen - octylene - octylen - nonylene - nonylen - undecylene - undecylen - - carbene - vinylene - neopentylene - - durylene - phenylene - phthalylidene - phthalal - isophthalylidene - isophthalal - terephthalylidene - terephthalal - tolylene - semicarbazono - siloxane - ureylene - xylylene - - hydrazo - azino - azo - azoxy - nno-azoxy - non-azoxy - onn-azoxy - diazoamino - nitrene - aminylene - imino - iminio - nitrilo - nitrilio - - phosphinidyne - arsinidyne - stibylidyne - bismuthylidyne - phosphinidenio - arsinidenio - phosphinico - arsinico - stibinico - - peroxy - - nitroryl - phosphinato - arsinato - stibinato - - silylene - germylene - stannylene - plumbylene - borylene - - oxy - thio - seleno - telluro - - - carbothioyl - carboselenoyl - carbotelluroyl - carbohydrazonoyl - carboimidoyl - carbohydroximoyl - - - epoxy - epithio - episeleno - epitelluro - - - epoxy - epithio - episeleno - epitelluro - - - amine - - - + + + methylene|methylen + ethylene|ethylen + propylene|propylen + butylene|butylen + pentylene|pentylen + hexylene|hexylen + heptylene|heptylen + octylene|octylen + nonylene|nonylen + undecylene|undecylen + + carbene + vinylene|vinylen + neopentylene|neopentylen + + durylene|durylen + phenylene|phenylen + phthalylidene|phthalyliden|phthalal + isophthalylidene|isophthalyliden|isophthalal + terephthalylidene|terephthalyliden|terephthalal + tolylene|tolylen + semicarbazono + siloxane|siloxan + ureylene|ureylen + xylylene|xylylen + + hydrazo + azino + azo + azoxy + nno-azoxy + non-azoxy + onn-azoxy + diazoamino + nitrene|aminylene|aminylen + imino + iminio + nitrilo + nitrilio + + phosphinidyne|phosphinidyn + arsinidyne|arsinidyn + stibylidyne|stibylidyn + bismuthylidyne|bismuthylidyn + phosphinidenio + arsinidenio + phosphinico + arsinico + stibinico + + peroxy + + nitroryl + phosphinato + arsinato + stibinato + + silylene|silylen + germylene|germylen + stannylene|stannylen + plumbylene|plumbylen + borylene|borylen + + oxy + + thio + seleno + telluro + + + carbothioyl + carboselenoyl + carbotelluroyl + carbohydrazonoyl + carboimidoyl + carbohydroximoyl + + + epoxy + epithio + episeleno + epitelluro + epiazano|epimino|epiimino + etheno + + + amine|amin + + diff -Nru opsin-1.5.0/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/naturalProducts.xml opsin-2.3.1/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/naturalProducts.xml --- opsin-1.5.0/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/naturalProducts.xml 2013-07-21 14:02:29.000000000 +0000 +++ opsin-2.3.1/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/naturalProducts.xml 2017-07-23 20:55:18.000000000 +0000 @@ -1,67 +1,77 @@ - - - - - - aconit - ajmal - morphin - trop - - androst - campest - chol - cholest - ergost - estr - oestr - furost - gon - gorgoste - poriferast - pregn - spirost - stigmast - - - prost - thrombox - - - - bufanolide - cardanolide - - - androstenol - androstenone - androstadienone - androstanediol - androstenediol - androstenedione - cholesterol - estratetraenol - morphine - - - - - aporphin - berbin - ergolin - - cepham - penam - - - - lysergic acid - isolysergic acid - lysergate - isolysergate - lysergamide - isolysergamide - lysergol - isolysergol - - \ No newline at end of file + + + + + + aconit + ajmal + cinchon + morphin + trop + + androst + campest + chol + cholest + ergost + estr|oestr + furost + gon + gorgost + poriferast + pregn + spirost + stigmast + + + prost + thrombox + + + + bufanolide|bufanolid + bufadienolide|bufadienolid + cardanolide|cardanolid + cardenolide|cardenolid + + + androstenol + androstenone|androstenon + androstadienone|androstadienon + androstanediol + androstenediol + androstenedione|androstenedion + campestanol + cholesterol + estratetraenol + morphine|morphin + + + + + aporphin + berbin + ergolin + + cepham + cephem + ceph-2-em + ceph-3-em + penam + penem + pen-2-em + 1-carbapen-1-em|1-carba-pen-1-em|carbapen-1-em|carba-pen-1-em + + + + lysergic acid + isolysergic acid + lysergate|lysergat + isolysergate|isolysergat + lysergamide|lysergamid + isolysergamide|isolysergamid + lysergol + isolysergol + + diff -Nru opsin-1.5.0/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/nonCarboxylicAcids.xml opsin-2.3.1/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/nonCarboxylicAcids.xml --- opsin-1.5.0/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/nonCarboxylicAcids.xml 2013-07-21 14:02:29.000000000 +0000 +++ opsin-2.3.1/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/nonCarboxylicAcids.xml 2017-07-23 20:55:18.000000000 +0000 @@ -1,322 +1,258 @@ - - - - - - - - - - - - arsor - arsen - azor - nitror - phosphor - orthophosphor - ortho-phosphor - stibor - antimon - sulfur - - - carbam - carbanil - carbon - mesyl - sulfam - sulfinam - sulfenam - trifl - - - - - arsin - arson - azin - azon - phosphen - phosphin - phosphon - stibin - stibon - - - - - - arsenen - bor - orthobor - ortho-bor - borin - boron - selen - tellur - - chrom - dichrom - mangan - permangan - technet - pertechnet - rhen - perrhen - - dibor - diboron - diphosphon - diphosphor - pyrophosphor - pyro-phosphor - diarson - diarsor - distibon - distibor - ditellur - diselen - disulfur - pyrosulfur - pyro-sulfur - hypodiboron - hypodiphosphon - hypodiphosphor - hypodiarson - hypodiarsor - hypodistibon - hypodistibor - hypodisulfur - dithion - hypodiselen - hypoditellur - dicarbon - disilic - - triphosphon - triphosphor - triselen - trisulfur - tricarbon - - tetraphosphor - tetracarbon - - trithion - tetrathion - pentathion - - - - - - - cacodyl - ethion - hyponitr - isethion - isohypophosphor - methion - nitron - orthocarbon - ortho-carbon - orthoform - ortho-form - orthoacet - ortho-acet - orthopropion - ortho-propion - orthobutyr - ortho-butyr - orthovaler - ortho-valer - orthotellur - ortho-tellur - hypobor - peracet - perform - pyrocarbon - pyro-carbon - sulfamid - sulfoxyl - - - - - amidosulfonate - amidosulfonicacid - amidosulfonic acid - arsenious - arseniousacid - arsenious acid - bicarbonate - bisulfate - bisulfite - diphosphate - diphosphite - diselenious - diseleniousacid - diselenious acid - disulfate - pyrosulfate - pyro-sulfate - disulfite - pyrosulfite - pyro-sulfite - metabisulfate - metabisulfite - hydrosulfite - hypodiphosphate - hypodiphosphite - hypophosphate - hypophosphorate - hypophosphoric - hypophosphoricacid - hypophosphoric acid - hypophosphite - hypophosphorite - hypophosphorous - hypophosphorousacid - hypophosphorous acid - isohypophosphate - orthophosphate - ortho-phosphate - orthophosphite - ortho-phosphite - pentaphosphate - persulfate - phosphite - phosphate - phosphoate - pyrophosphate - pyro-phosphate - pyrophosphite - pyro-phosphite - selenious - seleniousacid - selenious acid - selenite - selenate - selenoate - sulfite - sulfate - sulfoate - tellurite - tellurate - telluroate - tetraphosphate - triphosphate - - - bromic - bromicacid - bromic acid - bromous - bromousacid - bromous acid - chloric - chloricacid - chloric acid - chlorous - chlorousacid - chlorous acid - fluoric - fluoricacid - fluoric acid - fluorous - fluorousacid - fluorous acid - iodic - iodicacid - iodic acid - iodous - iodousacid - iodous acid - hypobromous - hypobromousacid - hypobromous acid - hypochlorous - hypochlorousacid - hypochlorous acid - hypofluorous - hypofluorousacid - hypofluorous acid - hypoiodous - hypoiodousacid - hypoiodous acid - metaperiodic - metaperiodicacid - metaperiodic acid - nitric - nitricacid - nitric acid - nitrous - nitrousacid - nitrous acid - orthoperiodic - orthoperiodicacid - orthoperiodic acid - ortho-periodic acid - silicic - silicicacid - silicic acid - orthosilicic - orthosilicicacid - orthosilicic acid - ortho-silicic acid - perbromic - perbromicacid - perbromic acid - perchloric - perchloricacid - perchloric acid - perfluoric - perfluoricacid - perfluoric acid - periodic - periodicacid - periodic acid - - bromate - bromite - chlorate - chlorite - fluorate - fluorite - iodate - iodite - hypobromite - hypochlorite - hypofluorite - hypoiodite - metaperiodate - nitrate - nitrite - orthoperiodate - ortho-periodate - silicate - orthosilicate - ortho-silicate - perbromate - perchlorate - perfluorate - periodate - - - - - sulfon - sulfin - thion - sulfen - selenon - selenin - selenen - telluron - tellurin - telluren - - \ No newline at end of file + + + + + + + + + + + + arsor + arsen + azor + nitror + phosphor + orthophosphor + ortho-phosphor + stibor + antimon + sulfur + + + carbam + carbanil + carbon + mesyl + diphosphor + pyrophosphor + pyro-phosphor + sulfam + sulfinam + sulfenam + + + + + arsin + arson + azin + azon + phosphen + phosphin + phosphon + stibin + stibon + + + + + + arsenen + bor + orthobor + ortho-bor + borin + boron + selen + tellur + + chrom + dichrom + mangan + permangan + technet + pertechnet + rhen + perrhen + perruthen + + dibor + diboron + diphosphon + diarson + diarsor + distibon + distibor + ditellur + diselen + disulfur|pyrosulfur|pyro-sulfur + hypodiboron + hypodiphosphon + hypodiphosphor + hypodiarson + hypodiarsor + hypodistibon + hypodistibor + hypodisulfur|dithion + hypodiselen + hypoditellur + dicarbon + disilic + + triphosphon + triphosphor + triselen + trisulfur + tricarbon + + tetraphosphor + tetracarbon + + trithion + tetrathion + pentathion + + + + + besyl + brosyl + cacodyl + edisyl + esyl + ethion + hyponitr + isethion + isohypophosphor + methion + nitron + nonafl + nosyl + orthocarbon|ortho-carbon + orthoform|ortho-form + orthoacet|ortho-acet + orthopropion|ortho-propion + orthobutyr|ortho-butyr + orthoisobutyr|ortho-isobutyr + orthovaler|ortho-valer + orthoisovaler|ortho-isovaler + orthotellur|ortho-tellur + hypobor + pyrocarbon|pyro-carbon + sulfamid + sulfoxyl + tosyl + tresyl + + + + + amidosulfonate|amidosulfonat + amidosulfonicacid|amidosulfonic acid + arsenious|arseniousacid|arsenious acid + bicarbonate|bicarbonat + bisulfate|bisulfat + bisulfite|bisulfit + diphosphate|diphosphat + diphosphite|diphosphit + diselenious|diseleniousacid|diselenious acid + disulfate|pyrosulfate|pyro-sulfate|disulfat|pyrosulfat|pyro-sulfat + disulfite|disulfit + pyrosulfite|pyro-sulfite|pyrosulfit|pyro-sulfit + metabisulfate|metabisulfat + metabisulfite|metabisulfit + hydrosulfite|hydrosulfit + hypothiocyanate|hypothiocyanat + hypothiocyanicacid|hypothiocyanic acid + hypothiocyanite|hypothiocyanit + hypothiocyanousacid|hypothiocyanous acid + hypodiphosphate|hypodiphosphat + hypodiphosphite|hypodiphosphit + hypophosphate|hypophosphorate|hypophosphat|hypophosphorat + hypophosphoric|hypophosphoricacid|hypophosphoric acid + hypophosphite|hypophosphorite|hypophosphit|hypophosphorit + hypophosphorous|hypophosphorousacid|hypophosphorous acid + isohypophosphate|isohypophosphat + orthonitrate|ortho-nitrate|orthonitrat|ortho-nitrat + orthophosphate|ortho-phosphate|orthophosphat|ortho-phosphat + orthophosphite|ortho-phosphite|orthophosphit|ortho-phosphit + pentaphosphate|pentaphosphat + peroxodicarbonate|peroxodicarbonat + peroxodicarbonicacid|peroxodicarbonic acid + peroxocarbonate|peroxocarbonat + peroxocarbonicacid|peroxocarbonic acid + persulfate|persulfat + perxenate|perxenat + perxenicacid|perxenic acid + phosphite|phosphit + phosphate|phosphoate|phosphat + pyrophosphate|pyro-phosphate|pyrophosphat|pyro-phosphat + pyrophosphite|pyro-phosphite|pyrophosphit|pyro-phosphit + selenious|seleniousacid|selenious acid + selenite|selenit + selenate|selenoate|selenat + sulfite|sulfit + sulfate|sulfoate|sulfat + peroxomonosulfate|peroxymonosulfate|peroxomonosulfat|peroxymonosulfat + peroxomonosulfuric|peroxomonosulfuricacid|peroxomonosulfuric acid|peroxymonosulfuric|peroxymonosulfuricacid|peroxymonosulfuric acid + tellurite|tellurit + tellurate|telluroate|tellurat + triflic|triflicacid|triflic acid + triflate|triflat + tetraphosphate|tetraphosphat + triphosphate|triphosphat + + + bromic|bromicacid|bromic acid + bromous|bromousacid|bromous acid + chloric|chloricacid|chloric acid + chlorous|chlorousacid|chlorous acid + fluoric|fluoricacid|fluoric acid + fluorous|fluorousacid|fluorous acid + iodic|iodicacid|iodic acid + iodous|iodousacid|iodous acid + hypobromous|hypobromousacid|hypobromous acid + hypochlorous|hypochlorousacid|hypochlorous acid + hypofluorous|hypofluorousacid|hypofluorous acid + hypoiodous|hypoiodousacid|hypoiodous acid + metaperiodic|metaperiodicacid|metaperiodic acid + nitric|nitricacid|nitric acid + nitrous|nitrousacid|nitrous acid + orthoperiodic|orthoperiodicacid|orthoperiodic acid|ortho-periodic acid + silicic|silicicacid|silicic acid|orthosilicic|orthosilicicacid|orthosilicic acid|ortho-silicic acid + perbromic|perbromicacid|perbromic acid + perchloric|perchloricacid|perchloric acid + perfluoric|perfluoricacid|perfluoric acid + periodic|periodicacid|periodic acid + + bromate|bromat + bromite|bromit + chlorate|chlorat + chlorite|chlorit + fluorate|fluorat + fluorite|fluorit + iodate|iodat + iodite|iodit + hypobromite|hypobromit + hypochlorite|hypochlorit + hypofluorite|hypofluorit + hypoiodite|hypoiodit + metaperiodate|metaperiodat + nitrate|nitrat + nitrite|nitrit + orthoperiodate|ortho-periodate|orthoperiodat|ortho-periodat + silicate|orthosilicate|ortho-silicate|silicat|orthosilicat|ortho-silicat + perbromate|perbromat + perchlorate|perchlorat + perfluorate|perfluorat + periodate|periodat + + + + + sulfon + sulfin|thion + sulfen + selenon + selenin + selenen + telluron + tellurin + telluren + + diff -Nru opsin-1.5.0/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/regexes.dtd opsin-2.3.1/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/regexes.dtd --- opsin-1.5.0/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/regexes.dtd 2013-07-21 14:02:29.000000000 +0000 +++ opsin-2.3.1/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/regexes.dtd 2017-07-23 20:55:18.000000000 +0000 @@ -2,4 +2,4 @@ \ No newline at end of file + value CDATA #REQUIRED> diff -Nru opsin-1.5.0/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/regexes.xml opsin-2.3.1/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/regexes.xml --- opsin-1.5.0/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/regexes.xml 2013-07-21 14:02:29.000000000 +0000 +++ opsin-2.3.1/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/regexes.xml 2017-07-23 20:55:18.000000000 +0000 @@ -1,6 +1,7 @@ - + - +--> - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + - + - \ No newline at end of file + diff -Nru opsin-1.5.0/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/regexTokenList.dtd opsin-2.3.1/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/regexTokenList.dtd --- opsin-1.5.0/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/regexTokenList.dtd 2013-07-21 14:02:29.000000000 +0000 +++ opsin-2.3.1/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/regexTokenList.dtd 2017-07-23 20:55:18.000000000 +0000 @@ -10,5 +10,5 @@ type CDATA #IMPLIED subType CDATA #IMPLIED value CDATA #IMPLIED - determinise CDATA #IMPLIED - ignoreWhenWritingXML CDATA #IMPLIED> \ No newline at end of file + determinise (yes) #IMPLIED + ignoreWhenWritingXML (yes) #IMPLIED> diff -Nru opsin-1.5.0/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/regexTokens.xml opsin-2.3.1/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/regexTokens.xml --- opsin-1.5.0/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/regexTokens.xml 2013-07-21 14:02:29.000000000 +0000 +++ opsin-2.3.1/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/regexTokens.xml 2017-07-23 20:55:18.000000000 +0000 @@ -1,71 +1,81 @@ - - + determinise(optional) == Tells the parse to convert this regex into a DFA for faster execution. Note that only a subset of regular expression syntax is supported by the DFA software. This should be done for any regex which has significant branching + type(optional) == Sets the type attribute on the XML element + subType(optional) == Sets the subType attribute on the XML element + value(optional) == Sets the value attribute on the XML element + ignoreWhenWritingXML(optional) == Tells the parser not to form an XML element out of these. That means that these entries are only syntactic and contain no useful information and are not useful after parsing has completed +--> - - - - - - - - - + + + + + + + + + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - \ No newline at end of file + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff -Nru opsin-1.5.0/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/serialisedAutomata/chemicalRegexHash.txt opsin-2.3.1/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/serialisedAutomata/chemicalRegexHash.txt --- opsin-1.5.0/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/serialisedAutomata/chemicalRegexHash.txt 2013-07-21 14:02:29.000000000 +0000 +++ opsin-2.3.1/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/serialisedAutomata/chemicalRegexHash.txt 2017-07-23 20:55:18.000000000 +0000 @@ -1 +1 @@ -796035260 \ No newline at end of file +-574983451 \ No newline at end of file diff -Nru opsin-1.5.0/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/serialisedAutomata/chemical_reversed_RegexHash.txt opsin-2.3.1/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/serialisedAutomata/chemical_reversed_RegexHash.txt --- opsin-1.5.0/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/serialisedAutomata/chemical_reversed_RegexHash.txt 2013-07-21 14:02:29.000000000 +0000 +++ opsin-2.3.1/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/serialisedAutomata/chemical_reversed_RegexHash.txt 2017-07-23 20:55:18.000000000 +0000 @@ -1 +1 @@ -796035260 \ No newline at end of file +-574983451 \ No newline at end of file Binary files /tmp/tmp6CBcZy/SdQwSMS2Pq/opsin-1.5.0/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/serialisedAutomata/chemical_reversed_SerialisedAutomaton.aut and /tmp/tmp6CBcZy/AUh1rCgBZF/opsin-2.3.1/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/serialisedAutomata/chemical_reversed_SerialisedAutomaton.aut differ Binary files /tmp/tmp6CBcZy/SdQwSMS2Pq/opsin-1.5.0/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/serialisedAutomata/chemicalSerialisedAutomaton.aut and /tmp/tmp6CBcZy/AUh1rCgBZF/opsin-2.3.1/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/serialisedAutomata/chemicalSerialisedAutomaton.aut differ diff -Nru opsin-1.5.0/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/serialisedAutomata/colonOrSemiColonDelimitedLocant_76RegexHash.txt opsin-2.3.1/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/serialisedAutomata/colonOrSemiColonDelimitedLocant_76RegexHash.txt --- opsin-1.5.0/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/serialisedAutomata/colonOrSemiColonDelimitedLocant_76RegexHash.txt 2013-07-21 14:02:29.000000000 +0000 +++ opsin-2.3.1/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/serialisedAutomata/colonOrSemiColonDelimitedLocant_76RegexHash.txt 2017-07-23 20:55:18.000000000 +0000 @@ -1 +1 @@ -1940430258 \ No newline at end of file +-1093235916 \ No newline at end of file diff -Nru opsin-1.5.0/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/serialisedAutomata/colonOrSemiColonDelimitedLocant_76_reversed_RegexHash.txt opsin-2.3.1/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/serialisedAutomata/colonOrSemiColonDelimitedLocant_76_reversed_RegexHash.txt --- opsin-1.5.0/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/serialisedAutomata/colonOrSemiColonDelimitedLocant_76_reversed_RegexHash.txt 2013-07-21 14:02:29.000000000 +0000 +++ opsin-2.3.1/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/serialisedAutomata/colonOrSemiColonDelimitedLocant_76_reversed_RegexHash.txt 2017-07-23 20:55:18.000000000 +0000 @@ -1 +1 @@ -1940430258 \ No newline at end of file +-1093235916 \ No newline at end of file Binary files /tmp/tmp6CBcZy/SdQwSMS2Pq/opsin-1.5.0/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/serialisedAutomata/colonOrSemiColonDelimitedLocant_76_reversed_SerialisedAutomaton.aut and /tmp/tmp6CBcZy/AUh1rCgBZF/opsin-2.3.1/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/serialisedAutomata/colonOrSemiColonDelimitedLocant_76_reversed_SerialisedAutomaton.aut differ Binary files /tmp/tmp6CBcZy/SdQwSMS2Pq/opsin-1.5.0/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/serialisedAutomata/colonOrSemiColonDelimitedLocant_76SerialisedAutomaton.aut and /tmp/tmp6CBcZy/AUh1rCgBZF/opsin-2.3.1/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/serialisedAutomata/colonOrSemiColonDelimitedLocant_76SerialisedAutomaton.aut differ diff -Nru opsin-1.5.0/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/serialisedAutomata/indicatedHydrogen_101RegexHash.txt opsin-2.3.1/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/serialisedAutomata/indicatedHydrogen_101RegexHash.txt --- opsin-1.5.0/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/serialisedAutomata/indicatedHydrogen_101RegexHash.txt 2013-07-21 14:02:29.000000000 +0000 +++ opsin-2.3.1/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/serialisedAutomata/indicatedHydrogen_101RegexHash.txt 2017-07-23 20:55:18.000000000 +0000 @@ -1 +1 @@ -232336742 \ No newline at end of file +-1630294980 \ No newline at end of file diff -Nru opsin-1.5.0/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/serialisedAutomata/indicatedHydrogen_101_reversed_RegexHash.txt opsin-2.3.1/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/serialisedAutomata/indicatedHydrogen_101_reversed_RegexHash.txt --- opsin-1.5.0/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/serialisedAutomata/indicatedHydrogen_101_reversed_RegexHash.txt 2013-07-21 14:02:29.000000000 +0000 +++ opsin-2.3.1/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/serialisedAutomata/indicatedHydrogen_101_reversed_RegexHash.txt 2017-07-23 20:55:18.000000000 +0000 @@ -1 +1 @@ -232336742 \ No newline at end of file +-1630294980 \ No newline at end of file Binary files /tmp/tmp6CBcZy/SdQwSMS2Pq/opsin-1.5.0/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/serialisedAutomata/indicatedHydrogen_101_reversed_SerialisedAutomaton.aut and /tmp/tmp6CBcZy/AUh1rCgBZF/opsin-2.3.1/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/serialisedAutomata/indicatedHydrogen_101_reversed_SerialisedAutomaton.aut differ Binary files /tmp/tmp6CBcZy/SdQwSMS2Pq/opsin-1.5.0/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/serialisedAutomata/indicatedHydrogen_101SerialisedAutomaton.aut and /tmp/tmp6CBcZy/AUh1rCgBZF/opsin-2.3.1/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/serialisedAutomata/indicatedHydrogen_101SerialisedAutomaton.aut differ diff -Nru opsin-1.5.0/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/serialisedAutomata/isotopeSpecification_255RegexHash.txt opsin-2.3.1/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/serialisedAutomata/isotopeSpecification_255RegexHash.txt --- opsin-1.5.0/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/serialisedAutomata/isotopeSpecification_255RegexHash.txt 1970-01-01 00:00:00.000000000 +0000 +++ opsin-2.3.1/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/serialisedAutomata/isotopeSpecification_255RegexHash.txt 2017-07-23 20:55:18.000000000 +0000 @@ -0,0 +1 @@ +-131769061 \ No newline at end of file diff -Nru opsin-1.5.0/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/serialisedAutomata/isotopeSpecification_255_reversed_RegexHash.txt opsin-2.3.1/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/serialisedAutomata/isotopeSpecification_255_reversed_RegexHash.txt --- opsin-1.5.0/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/serialisedAutomata/isotopeSpecification_255_reversed_RegexHash.txt 1970-01-01 00:00:00.000000000 +0000 +++ opsin-2.3.1/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/serialisedAutomata/isotopeSpecification_255_reversed_RegexHash.txt 2017-07-23 20:55:18.000000000 +0000 @@ -0,0 +1 @@ +-131769061 \ No newline at end of file Binary files /tmp/tmp6CBcZy/SdQwSMS2Pq/opsin-1.5.0/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/serialisedAutomata/isotopeSpecification_255_reversed_SerialisedAutomaton.aut and /tmp/tmp6CBcZy/AUh1rCgBZF/opsin-2.3.1/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/serialisedAutomata/isotopeSpecification_255_reversed_SerialisedAutomaton.aut differ Binary files /tmp/tmp6CBcZy/SdQwSMS2Pq/opsin-1.5.0/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/serialisedAutomata/isotopeSpecification_255SerialisedAutomaton.aut and /tmp/tmp6CBcZy/AUh1rCgBZF/opsin-2.3.1/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/serialisedAutomata/isotopeSpecification_255SerialisedAutomaton.aut differ diff -Nru opsin-1.5.0/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/serialisedAutomata/isotopeSpecification_256RegexHash.txt opsin-2.3.1/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/serialisedAutomata/isotopeSpecification_256RegexHash.txt --- opsin-1.5.0/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/serialisedAutomata/isotopeSpecification_256RegexHash.txt 1970-01-01 00:00:00.000000000 +0000 +++ opsin-2.3.1/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/serialisedAutomata/isotopeSpecification_256RegexHash.txt 2017-07-23 20:55:18.000000000 +0000 @@ -0,0 +1 @@ +-422363862 \ No newline at end of file diff -Nru opsin-1.5.0/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/serialisedAutomata/isotopeSpecification_256_reversed_RegexHash.txt opsin-2.3.1/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/serialisedAutomata/isotopeSpecification_256_reversed_RegexHash.txt --- opsin-1.5.0/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/serialisedAutomata/isotopeSpecification_256_reversed_RegexHash.txt 1970-01-01 00:00:00.000000000 +0000 +++ opsin-2.3.1/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/serialisedAutomata/isotopeSpecification_256_reversed_RegexHash.txt 2017-07-23 20:55:18.000000000 +0000 @@ -0,0 +1 @@ +-422363862 \ No newline at end of file Binary files /tmp/tmp6CBcZy/SdQwSMS2Pq/opsin-1.5.0/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/serialisedAutomata/isotopeSpecification_256_reversed_SerialisedAutomaton.aut and /tmp/tmp6CBcZy/AUh1rCgBZF/opsin-2.3.1/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/serialisedAutomata/isotopeSpecification_256_reversed_SerialisedAutomaton.aut differ Binary files /tmp/tmp6CBcZy/SdQwSMS2Pq/opsin-1.5.0/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/serialisedAutomata/isotopeSpecification_256SerialisedAutomaton.aut and /tmp/tmp6CBcZy/AUh1rCgBZF/opsin-2.3.1/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/serialisedAutomata/isotopeSpecification_256SerialisedAutomaton.aut differ diff -Nru opsin-1.5.0/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/serialisedAutomata/lambdaConvention_161RegexHash.txt opsin-2.3.1/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/serialisedAutomata/lambdaConvention_161RegexHash.txt --- opsin-1.5.0/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/serialisedAutomata/lambdaConvention_161RegexHash.txt 2013-07-21 14:02:29.000000000 +0000 +++ opsin-2.3.1/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/serialisedAutomata/lambdaConvention_161RegexHash.txt 2017-07-23 20:55:18.000000000 +0000 @@ -1 +1 @@ -1540732435 \ No newline at end of file +-1814313413 \ No newline at end of file diff -Nru opsin-1.5.0/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/serialisedAutomata/lambdaConvention_161_reversed_RegexHash.txt opsin-2.3.1/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/serialisedAutomata/lambdaConvention_161_reversed_RegexHash.txt --- opsin-1.5.0/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/serialisedAutomata/lambdaConvention_161_reversed_RegexHash.txt 2013-07-21 14:02:29.000000000 +0000 +++ opsin-2.3.1/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/serialisedAutomata/lambdaConvention_161_reversed_RegexHash.txt 2017-07-23 20:55:18.000000000 +0000 @@ -1 +1 @@ -1540732435 \ No newline at end of file +-1814313413 \ No newline at end of file Binary files /tmp/tmp6CBcZy/SdQwSMS2Pq/opsin-1.5.0/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/serialisedAutomata/lambdaConvention_161_reversed_SerialisedAutomaton.aut and /tmp/tmp6CBcZy/AUh1rCgBZF/opsin-2.3.1/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/serialisedAutomata/lambdaConvention_161_reversed_SerialisedAutomaton.aut differ Binary files /tmp/tmp6CBcZy/SdQwSMS2Pq/opsin-1.5.0/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/serialisedAutomata/lambdaConvention_161SerialisedAutomaton.aut and /tmp/tmp6CBcZy/AUh1rCgBZF/opsin-2.3.1/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/serialisedAutomata/lambdaConvention_161SerialisedAutomaton.aut differ diff -Nru opsin-1.5.0/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/serialisedAutomata/lightRotation_196RegexHash.txt opsin-2.3.1/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/serialisedAutomata/lightRotation_196RegexHash.txt --- opsin-1.5.0/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/serialisedAutomata/lightRotation_196RegexHash.txt 2013-07-21 14:02:29.000000000 +0000 +++ opsin-2.3.1/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/serialisedAutomata/lightRotation_196RegexHash.txt 1970-01-01 00:00:00.000000000 +0000 @@ -1 +0,0 @@ --21120730 \ No newline at end of file diff -Nru opsin-1.5.0/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/serialisedAutomata/lightRotation_196_reversed_RegexHash.txt opsin-2.3.1/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/serialisedAutomata/lightRotation_196_reversed_RegexHash.txt --- opsin-1.5.0/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/serialisedAutomata/lightRotation_196_reversed_RegexHash.txt 2013-07-21 14:02:29.000000000 +0000 +++ opsin-2.3.1/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/serialisedAutomata/lightRotation_196_reversed_RegexHash.txt 1970-01-01 00:00:00.000000000 +0000 @@ -1 +0,0 @@ --21120730 \ No newline at end of file Binary files /tmp/tmp6CBcZy/SdQwSMS2Pq/opsin-1.5.0/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/serialisedAutomata/lightRotation_196_reversed_SerialisedAutomaton.aut and /tmp/tmp6CBcZy/AUh1rCgBZF/opsin-2.3.1/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/serialisedAutomata/lightRotation_196_reversed_SerialisedAutomaton.aut differ Binary files /tmp/tmp6CBcZy/SdQwSMS2Pq/opsin-1.5.0/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/serialisedAutomata/lightRotation_196SerialisedAutomaton.aut and /tmp/tmp6CBcZy/AUh1rCgBZF/opsin-2.3.1/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/serialisedAutomata/lightRotation_196SerialisedAutomaton.aut differ diff -Nru opsin-1.5.0/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/serialisedAutomata/locant_108RegexHash.txt opsin-2.3.1/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/serialisedAutomata/locant_108RegexHash.txt --- opsin-1.5.0/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/serialisedAutomata/locant_108RegexHash.txt 2013-07-21 14:02:29.000000000 +0000 +++ opsin-2.3.1/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/serialisedAutomata/locant_108RegexHash.txt 2017-07-23 20:55:18.000000000 +0000 @@ -1 +1 @@ --1799563333 \ No newline at end of file +722376024 \ No newline at end of file diff -Nru opsin-1.5.0/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/serialisedAutomata/locant_108_reversed_RegexHash.txt opsin-2.3.1/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/serialisedAutomata/locant_108_reversed_RegexHash.txt --- opsin-1.5.0/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/serialisedAutomata/locant_108_reversed_RegexHash.txt 2013-07-21 14:02:29.000000000 +0000 +++ opsin-2.3.1/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/serialisedAutomata/locant_108_reversed_RegexHash.txt 2017-07-23 20:55:18.000000000 +0000 @@ -1 +1 @@ --1799563333 \ No newline at end of file +722376024 \ No newline at end of file Binary files /tmp/tmp6CBcZy/SdQwSMS2Pq/opsin-1.5.0/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/serialisedAutomata/locant_108_reversed_SerialisedAutomaton.aut and /tmp/tmp6CBcZy/AUh1rCgBZF/opsin-2.3.1/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/serialisedAutomata/locant_108_reversed_SerialisedAutomaton.aut differ Binary files /tmp/tmp6CBcZy/SdQwSMS2Pq/opsin-1.5.0/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/serialisedAutomata/locant_108SerialisedAutomaton.aut and /tmp/tmp6CBcZy/AUh1rCgBZF/opsin-2.3.1/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/serialisedAutomata/locant_108SerialisedAutomaton.aut differ diff -Nru opsin-1.5.0/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/serialisedAutomata/spiro_83RegexHash.txt opsin-2.3.1/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/serialisedAutomata/spiro_83RegexHash.txt --- opsin-1.5.0/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/serialisedAutomata/spiro_83RegexHash.txt 2013-07-21 14:02:29.000000000 +0000 +++ opsin-2.3.1/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/serialisedAutomata/spiro_83RegexHash.txt 2017-07-23 20:55:18.000000000 +0000 @@ -1 +1 @@ --1289108517 \ No newline at end of file +888073889 \ No newline at end of file diff -Nru opsin-1.5.0/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/serialisedAutomata/spiro_83_reversed_RegexHash.txt opsin-2.3.1/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/serialisedAutomata/spiro_83_reversed_RegexHash.txt --- opsin-1.5.0/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/serialisedAutomata/spiro_83_reversed_RegexHash.txt 2013-07-21 14:02:29.000000000 +0000 +++ opsin-2.3.1/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/serialisedAutomata/spiro_83_reversed_RegexHash.txt 2017-07-23 20:55:18.000000000 +0000 @@ -1 +1 @@ --1289108517 \ No newline at end of file +888073889 \ No newline at end of file Binary files /tmp/tmp6CBcZy/SdQwSMS2Pq/opsin-1.5.0/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/serialisedAutomata/spiro_83_reversed_SerialisedAutomaton.aut and /tmp/tmp6CBcZy/AUh1rCgBZF/opsin-2.3.1/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/serialisedAutomata/spiro_83_reversed_SerialisedAutomaton.aut differ Binary files /tmp/tmp6CBcZy/SdQwSMS2Pq/opsin-1.5.0/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/serialisedAutomata/spiro_83SerialisedAutomaton.aut and /tmp/tmp6CBcZy/AUh1rCgBZF/opsin-2.3.1/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/serialisedAutomata/spiro_83SerialisedAutomaton.aut differ diff -Nru opsin-1.5.0/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/serialisedAutomata/spiroLocant_201RegexHash.txt opsin-2.3.1/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/serialisedAutomata/spiroLocant_201RegexHash.txt --- opsin-1.5.0/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/serialisedAutomata/spiroLocant_201RegexHash.txt 2013-07-21 14:02:29.000000000 +0000 +++ opsin-2.3.1/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/serialisedAutomata/spiroLocant_201RegexHash.txt 2017-07-23 20:55:18.000000000 +0000 @@ -1 +1 @@ --794234790 \ No newline at end of file +-270365898 \ No newline at end of file diff -Nru opsin-1.5.0/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/serialisedAutomata/spiroLocant_201_reversed_RegexHash.txt opsin-2.3.1/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/serialisedAutomata/spiroLocant_201_reversed_RegexHash.txt --- opsin-1.5.0/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/serialisedAutomata/spiroLocant_201_reversed_RegexHash.txt 2013-07-21 14:02:29.000000000 +0000 +++ opsin-2.3.1/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/serialisedAutomata/spiroLocant_201_reversed_RegexHash.txt 2017-07-23 20:55:18.000000000 +0000 @@ -1 +1 @@ --794234790 \ No newline at end of file +-270365898 \ No newline at end of file Binary files /tmp/tmp6CBcZy/SdQwSMS2Pq/opsin-1.5.0/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/serialisedAutomata/spiroLocant_201_reversed_SerialisedAutomaton.aut and /tmp/tmp6CBcZy/AUh1rCgBZF/opsin-2.3.1/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/serialisedAutomata/spiroLocant_201_reversed_SerialisedAutomaton.aut differ Binary files /tmp/tmp6CBcZy/SdQwSMS2Pq/opsin-1.5.0/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/serialisedAutomata/spiroLocant_201SerialisedAutomaton.aut and /tmp/tmp6CBcZy/AUh1rCgBZF/opsin-2.3.1/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/serialisedAutomata/spiroLocant_201SerialisedAutomaton.aut differ diff -Nru opsin-1.5.0/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/serialisedAutomata/stereoChemistry_185RegexHash.txt opsin-2.3.1/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/serialisedAutomata/stereoChemistry_185RegexHash.txt --- opsin-1.5.0/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/serialisedAutomata/stereoChemistry_185RegexHash.txt 2013-07-21 14:02:29.000000000 +0000 +++ opsin-2.3.1/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/serialisedAutomata/stereoChemistry_185RegexHash.txt 2017-07-23 20:55:18.000000000 +0000 @@ -1 +1 @@ --1380502035 \ No newline at end of file +-884703251 \ No newline at end of file diff -Nru opsin-1.5.0/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/serialisedAutomata/stereoChemistry_185_reversed_RegexHash.txt opsin-2.3.1/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/serialisedAutomata/stereoChemistry_185_reversed_RegexHash.txt --- opsin-1.5.0/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/serialisedAutomata/stereoChemistry_185_reversed_RegexHash.txt 2013-07-21 14:02:29.000000000 +0000 +++ opsin-2.3.1/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/serialisedAutomata/stereoChemistry_185_reversed_RegexHash.txt 2017-07-23 20:55:18.000000000 +0000 @@ -1 +1 @@ --1380502035 \ No newline at end of file +-884703251 \ No newline at end of file Binary files /tmp/tmp6CBcZy/SdQwSMS2Pq/opsin-1.5.0/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/serialisedAutomata/stereoChemistry_185_reversed_SerialisedAutomaton.aut and /tmp/tmp6CBcZy/AUh1rCgBZF/opsin-2.3.1/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/serialisedAutomata/stereoChemistry_185_reversed_SerialisedAutomaton.aut differ Binary files /tmp/tmp6CBcZy/SdQwSMS2Pq/opsin-1.5.0/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/serialisedAutomata/stereoChemistry_185SerialisedAutomaton.aut and /tmp/tmp6CBcZy/AUh1rCgBZF/opsin-2.3.1/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/serialisedAutomata/stereoChemistry_185SerialisedAutomaton.aut differ diff -Nru opsin-1.5.0/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/serialisedAutomata/stereoChemistry_196RegexHash.txt opsin-2.3.1/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/serialisedAutomata/stereoChemistry_196RegexHash.txt --- opsin-1.5.0/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/serialisedAutomata/stereoChemistry_196RegexHash.txt 1970-01-01 00:00:00.000000000 +0000 +++ opsin-2.3.1/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/serialisedAutomata/stereoChemistry_196RegexHash.txt 2017-07-23 20:55:18.000000000 +0000 @@ -0,0 +1 @@ +-21120730 \ No newline at end of file diff -Nru opsin-1.5.0/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/serialisedAutomata/stereoChemistry_196_reversed_RegexHash.txt opsin-2.3.1/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/serialisedAutomata/stereoChemistry_196_reversed_RegexHash.txt --- opsin-1.5.0/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/serialisedAutomata/stereoChemistry_196_reversed_RegexHash.txt 1970-01-01 00:00:00.000000000 +0000 +++ opsin-2.3.1/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/serialisedAutomata/stereoChemistry_196_reversed_RegexHash.txt 2017-07-23 20:55:18.000000000 +0000 @@ -0,0 +1 @@ +-21120730 \ No newline at end of file Binary files /tmp/tmp6CBcZy/SdQwSMS2Pq/opsin-1.5.0/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/serialisedAutomata/stereoChemistry_196_reversed_SerialisedAutomaton.aut and /tmp/tmp6CBcZy/AUh1rCgBZF/opsin-2.3.1/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/serialisedAutomata/stereoChemistry_196_reversed_SerialisedAutomaton.aut differ Binary files /tmp/tmp6CBcZy/SdQwSMS2Pq/opsin-1.5.0/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/serialisedAutomata/stereoChemistry_196SerialisedAutomaton.aut and /tmp/tmp6CBcZy/AUh1rCgBZF/opsin-2.3.1/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/serialisedAutomata/stereoChemistry_196SerialisedAutomaton.aut differ diff -Nru opsin-1.5.0/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/serialisedAutomata/stereoChemistry_202RegexHash.txt opsin-2.3.1/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/serialisedAutomata/stereoChemistry_202RegexHash.txt --- opsin-1.5.0/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/serialisedAutomata/stereoChemistry_202RegexHash.txt 2013-07-21 14:02:29.000000000 +0000 +++ opsin-2.3.1/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/serialisedAutomata/stereoChemistry_202RegexHash.txt 2017-07-23 20:55:18.000000000 +0000 @@ -1 +1 @@ --2040848597 \ No newline at end of file +-230882808 \ No newline at end of file diff -Nru opsin-1.5.0/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/serialisedAutomata/stereoChemistry_202_reversed_RegexHash.txt opsin-2.3.1/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/serialisedAutomata/stereoChemistry_202_reversed_RegexHash.txt --- opsin-1.5.0/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/serialisedAutomata/stereoChemistry_202_reversed_RegexHash.txt 2013-07-21 14:02:29.000000000 +0000 +++ opsin-2.3.1/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/serialisedAutomata/stereoChemistry_202_reversed_RegexHash.txt 2017-07-23 20:55:18.000000000 +0000 @@ -1 +1 @@ --2040848597 \ No newline at end of file +-230882808 \ No newline at end of file Binary files /tmp/tmp6CBcZy/SdQwSMS2Pq/opsin-1.5.0/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/serialisedAutomata/stereoChemistry_202_reversed_SerialisedAutomaton.aut and /tmp/tmp6CBcZy/AUh1rCgBZF/opsin-2.3.1/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/serialisedAutomata/stereoChemistry_202_reversed_SerialisedAutomaton.aut differ Binary files /tmp/tmp6CBcZy/SdQwSMS2Pq/opsin-1.5.0/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/serialisedAutomata/stereoChemistry_202SerialisedAutomaton.aut and /tmp/tmp6CBcZy/AUh1rCgBZF/opsin-2.3.1/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/serialisedAutomata/stereoChemistry_202SerialisedAutomaton.aut differ diff -Nru opsin-1.5.0/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/serialisedAutomata/stereoChemistry_230RegexHash.txt opsin-2.3.1/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/serialisedAutomata/stereoChemistry_230RegexHash.txt --- opsin-1.5.0/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/serialisedAutomata/stereoChemistry_230RegexHash.txt 2013-07-21 14:02:29.000000000 +0000 +++ opsin-2.3.1/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/serialisedAutomata/stereoChemistry_230RegexHash.txt 2017-07-23 20:55:18.000000000 +0000 @@ -1 +1 @@ -86186355 \ No newline at end of file +1608971995 \ No newline at end of file diff -Nru opsin-1.5.0/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/serialisedAutomata/stereoChemistry_230_reversed_RegexHash.txt opsin-2.3.1/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/serialisedAutomata/stereoChemistry_230_reversed_RegexHash.txt --- opsin-1.5.0/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/serialisedAutomata/stereoChemistry_230_reversed_RegexHash.txt 2013-07-21 14:02:29.000000000 +0000 +++ opsin-2.3.1/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/serialisedAutomata/stereoChemistry_230_reversed_RegexHash.txt 2017-07-23 20:55:18.000000000 +0000 @@ -1 +1 @@ -86186355 \ No newline at end of file +1608971995 \ No newline at end of file Binary files /tmp/tmp6CBcZy/SdQwSMS2Pq/opsin-1.5.0/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/serialisedAutomata/stereoChemistry_230_reversed_SerialisedAutomaton.aut and /tmp/tmp6CBcZy/AUh1rCgBZF/opsin-2.3.1/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/serialisedAutomata/stereoChemistry_230_reversed_SerialisedAutomaton.aut differ Binary files /tmp/tmp6CBcZy/SdQwSMS2Pq/opsin-1.5.0/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/serialisedAutomata/stereoChemistry_230SerialisedAutomaton.aut and /tmp/tmp6CBcZy/AUh1rCgBZF/opsin-2.3.1/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/serialisedAutomata/stereoChemistry_230SerialisedAutomaton.aut differ diff -Nru opsin-1.5.0/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/serialisedAutomata/stereoChemistry_69RegexHash.txt opsin-2.3.1/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/serialisedAutomata/stereoChemistry_69RegexHash.txt --- opsin-1.5.0/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/serialisedAutomata/stereoChemistry_69RegexHash.txt 2013-07-21 14:02:29.000000000 +0000 +++ opsin-2.3.1/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/serialisedAutomata/stereoChemistry_69RegexHash.txt 2017-07-23 20:55:18.000000000 +0000 @@ -1 +1 @@ --644989176 \ No newline at end of file +-1020266283 \ No newline at end of file diff -Nru opsin-1.5.0/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/serialisedAutomata/stereoChemistry_69_reversed_RegexHash.txt opsin-2.3.1/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/serialisedAutomata/stereoChemistry_69_reversed_RegexHash.txt --- opsin-1.5.0/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/serialisedAutomata/stereoChemistry_69_reversed_RegexHash.txt 2013-07-21 14:02:29.000000000 +0000 +++ opsin-2.3.1/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/serialisedAutomata/stereoChemistry_69_reversed_RegexHash.txt 2017-07-23 20:55:18.000000000 +0000 @@ -1 +1 @@ --644989176 \ No newline at end of file +-1020266283 \ No newline at end of file Binary files /tmp/tmp6CBcZy/SdQwSMS2Pq/opsin-1.5.0/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/serialisedAutomata/stereoChemistry_69_reversed_SerialisedAutomaton.aut and /tmp/tmp6CBcZy/AUh1rCgBZF/opsin-2.3.1/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/serialisedAutomata/stereoChemistry_69_reversed_SerialisedAutomaton.aut differ Binary files /tmp/tmp6CBcZy/SdQwSMS2Pq/opsin-1.5.0/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/serialisedAutomata/stereoChemistry_69SerialisedAutomaton.aut and /tmp/tmp6CBcZy/AUh1rCgBZF/opsin-2.3.1/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/serialisedAutomata/stereoChemistry_69SerialisedAutomaton.aut differ diff -Nru opsin-1.5.0/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/serialisedAutomata/vonBaeyer_66RegexHash.txt opsin-2.3.1/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/serialisedAutomata/vonBaeyer_66RegexHash.txt --- opsin-1.5.0/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/serialisedAutomata/vonBaeyer_66RegexHash.txt 2013-07-21 14:02:29.000000000 +0000 +++ opsin-2.3.1/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/serialisedAutomata/vonBaeyer_66RegexHash.txt 2017-07-23 20:55:18.000000000 +0000 @@ -1 +1 @@ --199616182 \ No newline at end of file +-1917907032 \ No newline at end of file diff -Nru opsin-1.5.0/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/serialisedAutomata/vonBaeyer_66_reversed_RegexHash.txt opsin-2.3.1/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/serialisedAutomata/vonBaeyer_66_reversed_RegexHash.txt --- opsin-1.5.0/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/serialisedAutomata/vonBaeyer_66_reversed_RegexHash.txt 2013-07-21 14:02:29.000000000 +0000 +++ opsin-2.3.1/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/serialisedAutomata/vonBaeyer_66_reversed_RegexHash.txt 2017-07-23 20:55:18.000000000 +0000 @@ -1 +1 @@ --199616182 \ No newline at end of file +-1917907032 \ No newline at end of file Binary files /tmp/tmp6CBcZy/SdQwSMS2Pq/opsin-1.5.0/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/serialisedAutomata/vonBaeyer_66_reversed_SerialisedAutomaton.aut and /tmp/tmp6CBcZy/AUh1rCgBZF/opsin-2.3.1/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/serialisedAutomata/vonBaeyer_66_reversed_SerialisedAutomaton.aut differ Binary files /tmp/tmp6CBcZy/SdQwSMS2Pq/opsin-1.5.0/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/serialisedAutomata/vonBaeyer_66SerialisedAutomaton.aut and /tmp/tmp6CBcZy/AUh1rCgBZF/opsin-2.3.1/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/serialisedAutomata/vonBaeyer_66SerialisedAutomaton.aut differ diff -Nru opsin-1.5.0/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/simpleCyclicGroups.xml opsin-2.3.1/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/simpleCyclicGroups.xml --- opsin-1.5.0/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/simpleCyclicGroups.xml 2013-07-21 14:02:29.000000000 +0000 +++ opsin-2.3.1/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/simpleCyclicGroups.xml 2017-07-23 20:55:18.000000000 +0000 @@ -1,166 +1,151 @@ - - - - - - alpha-asarone - beta-asarone - alpha-furil - 2,2'-furil - 2,2'-furoin - abietamide - acetovanillone - anise alcohol - anisil - p-anisil - anisoin - p-anisoin - anthra-1,2-quinone - anthra-1,4-quinone - anthra-9,10-quinone - apocynin - barbituric - barbituricacid - barbituric acid - benzil - benzo-1,2-quinone - benzo-1,4-quinone - benzol - benzopinacol - benzopinacolone - benzosemiquinone - bisphenol a - bourbonal - carvacrol - chavicol - coniferol - creosol - cumaldehyde - cuminic acid - cuminicacid - cumic acid - cumicacid - cuminal - cuminaldehyde - cuminol - curcumin - dibenzamid - dibenzamide - diphenate - diphenic - diphenicacid - diphenic acid - durohydroquinone - dypnone - elemicin - estragole - ethyl vanillin - ethylvanillin - homoisovanillin - homovanillin - o-homovanillin - isatic - isaticacid - isatic acid - isatoic - isatoicacid - isatoic acid - isoelemicin - isoeugenol - isophthalyl alcohol - isovanillin - quinol - mesitylenate - mesitylenic - mesitylenic acid - mesitylenicacid - naphtho-1,2-quinone - naphtho-1,4-quinone - nicotine - olivetol - orcinol - perbenzoate - perbenzoic - perbenzoicacid - perbenzoic acid - phthalyl alcohol - phenol - phenolate - phenylium - phloretin - phlorol - picrate - picric - picricacid - picric acid - pseudocumohydroquinone - pterin - dihydropterin - tetrahydropterin - quinolinic - quinolinicacid - quinolinic acid - methanopterin - tetrahydromethanopterin - 5,6,7,8-tetrahydromethanopterin - resveratrol - resorcin - rhapontigenin - saligenin - diethylstilbestrol - styphnate - styphnic - styphnicacid - styphnic acid - styrol - sydnone - terephthalyl alcohol - thymohydroquinone - thymol - toluhydroquinone - toluol - tribenzamid - tribenzamide - uric - uricacid - uric acid - vanillin - o-vanillin - xylol - o-xylohydroquinone - ortho-xylohydroquinone - m-xylohydroquinone - meta-xylohydroquinone - p-xylohydroquinone - para-xylohydroquinone - zingerone - - - - - 5'-adenylic - 5'-adenylicacid - 5'-adenylic acid - 5'-thymidylic - 5'-thymidylicacid - 5'-thymidylic acid - 5'-guanylic - 5'-guanylicacid - 5'-guanylic acid - 5'-inosinic - 5'-inosinicacid - 5'-inosinic acid - 5'-xanthylic - 5'-xanthylicacid - 5'-xanthylic acid - 5'-cytidylic - 5'-cytidylicacid - 5'-cytidylic acid - 5'-uridylic - 5'-uridylicacid - 5'-uridylic acid - 5'-orotidylic - 5'-orotidylicacid - 5'-orotidylic acid - - \ No newline at end of file + + + + + 3-pyrazolone|3-pyrazolon + 4-pyrazolone|4-pyrazolon + 5-pyrazolone|5-pyrazolon + alpha-asarone|alpha-asaron + beta-asarone|beta-asaron + alpha-furil|2,2'-furil + 2,2'-furoin + abietamide|abietamid + acetovanillone|acetovanillon + anise alcohol + anisil|p-anisil + anisoin|p-anisoin + anthra-1,2-quinone|anthra-1,2-quinon + anthra-1,4-quinone|anthra-1,4-quinon + anthra-9,10-quinone|anthra-9,10-quinon + anthranil + apocynin + barbituric|barbituricacid|barbituric acid + benzil + benzo-1,2-quinone|benzo-1,2-quinon + benzo-1,4-quinone|benzo-1,4-quinon + benzol + benzopinacol + benzopinacolone|benzopinacolon + benzosemiquinone|benzosemiquinon + bisphenol a|bisphenol-a + bourbonal + carvacrol + catecholate|catecholat + chavicol + coniferol + creosol + cumaldehyde|cumaldehyd + cuminic acid|cuminicacid|cumic acid|cumicacid + cuminal|cuminaldehyde|cuminaldehyd + cuminol + curcumin + cyanuramide|cyanurotriamide|cyanurotriamine|cyanuramid|cyanurotriamid|cyanurotriamin + cyanuric bromide|cyanuryl bromide|cyanuric bromid|cyanuryl bromid + cyanuric chloride|cyanuryl chloride|cyanuric chlorid|cyanuryl chlorid + cyanuric fluoride|cyanuryl fluoride|cyanuric fluorid|cyanuryl fluorid + cyanuric iodide|cyanuryl iodide|cyanuric iodid|cyanuryl iodid + cyclopentadienide|cyclopentadienid + cyclopentadienylium + cyclotetraphosphazene|cyclotetraphosphazen + cyclotriphosphazene|cyclotriphosphazen + dibenzamide|dibenzamid + diphenate + diphenic|diphenicacid|diphenic acid + durohydroquinone|durohydroquinon + dypnone + elemicin + estragole|estragol + ethyl vanillin|ethylvanillin + homoisovanillin + homovanillin + o-homovanillin + imidazolate|imidazolat + isatic|isaticacid|isatic acid + isatoic|isatoicacid|isatoic acid + isoelemicin + isoeugenol + isophthalyl alcohol + isovanillin + + lutidine|lutidin + melamine|melamin + quinol + mesitylenate|mesitylenat + mesitylenic|mesitylenic acid|mesitylenicacid + naphtho-1,2-quinone|naphtho-1,2-quinon + naphtho-1,4-quinone|naphtho-1,4-quinon + nicotine|nicotin + olivetol + orcinol + perbenzoate|perbenzoat + perbenzoic|perbenzoicacid|perbenzoic acid + phthalyl alcohol + phenol + phenolate|phenolat + phenylium + phloretin + phlorol + picrate|picrat + picric|picricacid|picric acid + pseudocumohydroquinone|pseudocumohydroquinon + pterin + dihydropterin + tetrahydropterin + pyrocatecholate|pyrocatecholat + quinolinic|quinolinicacid|quinolinic acid + methanopterin + tetrahydromethanopterin|5,6,7,8-tetrahydromethanopterin + resveratrol + resorcin + rhapontigenin + saligenin + diethylstilbestrol + styphnate|styphnat + styphnic|styphnicacid|styphnic acid + styrene carbonate|styrene carbonat + styrol + sydnone|sydnon + terephthalyl alcohol + thymohydroquinone|thymohydroquinon + thymol + toluhydroquinone|toluhydroquinon + toluol + tribenzamide|tribenzamid + uric|uricacid|uric acid + vanillin + o-vanillin + xylol + o-xylohydroquinone|ortho-xylohydroquinone|o-xylohydroquinon|ortho-xylohydroquinon + m-xylohydroquinone|meta-xylohydroquinone|m-xylohydroquinon|meta-xylohydroquinon + p-xylohydroquinone|para-xylohydroquinone|p-xylohydroquinon|para-xylohydroquinon + zingerone|zingeron + + + + + 2'-adenylic|2'-adenylicacid|2'-adenylic acid + 3'-adenylic|3'-adenylicacid|3'-adenylic acid + 5'-adenylic|5'-adenylicacid|5'-adenylic acid + 3'-thymidylic|3'-thymidylicacid|3'-thymidylic acid + 5'-thymidylic|5'-thymidylicacid|5'-thymidylic acid + 2'-guanylic|2'-guanylicacid|2'-guanylic acid + 3'-guanylic|3'-guanylicacid|3'-guanylic acid + 5'-guanylic|5'-guanylicacid|5'-guanylic acid + 2'-inosinic|2'-inosinicacid|2'-inosinic acid + 3'-inosinic|3'-inosinicacid|3'-inosinic acid + 5'-inosinic|5'-inosinicacid|5'-inosinic acid|inosinic acid + thioinosinic acid + 2'-xanthylic|2'-xanthylicacid|2'-xanthylic acid + 3'-xanthylic|3'-xanthylicacid|3'-xanthylic acid + 5'-xanthylic|5'-xanthylicacid|5'-xanthylic acid + 2'-cytidylic|2'-cytidylicacid|2'-cytidylic acid + 3'-cytidylic|3'-cytidylicacid|3'-cytidylic acid + 5'-cytidylic|5'-cytidylicacid|5'-cytidylic acid + 2'-uridylic|2'-uridylicacid|2'-uridylic acid + 3'-uridylic|3'-uridylicacid|3'-uridylic acid + 5'-uridylic|5'-uridylicacid|5'-uridylic acid + 2'-orotidylic|2'-orotidylicacid|2'-orotidylic acid + 3'-orotidylic|3'-orotidylicacid|3'-orotidylic acid + 5'-orotidylic|5'-orotidylicacid|5'-orotidylic acid + + diff -Nru opsin-1.5.0/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/simpleGroups.xml opsin-2.3.1/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/simpleGroups.xml --- opsin-1.5.0/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/simpleGroups.xml 2013-07-21 14:02:29.000000000 +0000 +++ opsin-2.3.1/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/simpleGroups.xml 2017-07-23 20:55:18.000000000 +0000 @@ -1,424 +1,435 @@ - - - - - - acetoin - acetone cyanohydrin - acetoxime - acetoxylium - acetylide - acrolein - agmatine - allicin - aluminohydride - amidogen - aminoxide - aminoxylium - aminylium - - biacetyl - bismuth oxychloride - bismuthoxychloride - bitartrate - bicine - biisopropenyl - biisopropyl - biguanide - biguanidine - biuret - bombykol - borohydride - bromal - bromamine - bromamide - dibromamine - bromimide - bromoform - busulfan - butoxide - butoxylium - butyroin - cadaverine - camphor - camphorsulfonate - camphorsulfonic - camphorsulfonicacid - camphorsulfonic acid - capraldehyde - capramide - caprate - capric - capricacid - capric acid - caprinitrile - capriphenone - carbazone - carbinol - carnitine - l-carnitine - d-carnitine - carbodiazone - carbodiimide - carbonohydrazide - carbohydrazide - carbazide - cetane - cetanol - chloral - chloral hydrate - chloralhydrate - chloramine - chloramide - dichloramine - chlorimide - chloroform - chloropicrin - citral - citronellol - beta-citronellol - alpha-citronellol - citronellal - beta-citronellal - alpha-citronellal - crotonylene - cyanamide - cyanic - cyanicacid - cyanic acid - cyanogen - cyanogen azide - cyanogen bromide - cyanogen chloride - cyanogen fluoride - cyanogen iodide - cyanuric acid - cyanuricacid - dansylamide - diacetamide - diacetonamine - diacetone alcohol - dichlorvos - dicyan - dicyanamide - diethanolamine - dimercaprol - diglyme - diphosgene - eicosasphinganine - erythrene - ethoxide - ethoxylium - ethylene - alpha-farnesene - beta-farnesene - farnesol - ferricyanide - ferrocyanide - fluoramine - fluoramide - difluoramine - fluorimide - fluoroform - fulminicacid - fulminic acid - geranial - geraniol - glycerone - glyme - glyoxal - glyoxime - halothane - hexametapol - hexamethyldisilazide - hydroxide - hydroxylium - hydroperoxylium - hydroxycitronellal - icosasphinganine - imidogen - iodoform - iron oxychloride - ironoxychloride - isobutoxide - isobutylene - isocarbonohydrazide - isocyanic - isocyanicacid - isocyanic acid - isothiocyanic - isothiocyanicacid - isothiocyanic acid - isoselenocyanic - isoselenocyanicacid - isoselenocyanic acid - isotellurocyanic - isotellurocyanicacid - isotellurocyanic acid - isocyanuric acid - isocyanuricacid - isofulminicacid - isofulminic acid - isophorone - isophorone diisocyanate - isopropoxide - isoselenourea - isosemicarbazide - isotellurourea - isothiourea - isourea - itatartrate - laccerol - linalool - lignocerol - mesilate - mesityl oxide - methacrolein - methamidophos - methoxide - methoxylium - methylal - monoethanolamine - mono-ethanolamine - monoglyme - alpha-myrcene - beta-myrcene - neopentyl glycol - neral - nerol - nerolidol - neurine - nitramide - nitrenium - nitroform - nitroglycerin - nitrosonium - nitronium - nitrosamide - nitrous oxide - nitrousoxide - nitroxide - nitroxyl - alpha-ocimene - beta-ocimene - oxaldehyde - oxamide - oxylium - ozone - ozonide - penicillic - penicillicacid - penicillic acid - penicillate - pentaerythritol - dipentaerythritol - tripentaerythritol - pentaguanide - pentauret - peroxylium - phorone - phosgene - phosphoramide - phosphorus oxybromide - phosphorous oxybromide - phosphorusoxybromide - phosphorousoxybromide - phosphorus oxychloride - phosphorous oxychloride - phosphorusoxychloride - phosphorousoxychloride - phosphorus oxyfluoride - phosphorous oxyfluoride - phosphorusoxyfluoride - phosphorousoxyfluoride - phosphorus oxyiodide - phosphorous oxyiodide - phosphorusoxyiodide - phosphorousoxyiodide - phosphorus pentasulfide - phytol - phytosphingosine - pinacol - pinacolone - piperylene - pristane - propione - propionoin - propioin - propoxide - propoxylium - propylene - putrescine - rhodinol - rhodinal - rubeanic acid - sarin - sec-butoxide - secbutoxide - selenilimine - selenium oxybromide - seleniumoxybromide - selenoximide - selenoximine - semicarbazide - semioxamazide - soman - spermidine - spermine - sphinganine - sphingosine - squalane - squalene - sulfamide - sulfilimine - sulfimide - sulfoximide - sulfoximine - sulfoxonium - tabun - tartrate - d-tartrate - (d)-tartrate - d(-)-tartrate - d-(-)-tartrate - l-tartrate - (l)-tartrate - l(+)-tartrate - l-(+)-tartrate - tellurilimine - telluroximide - telluroximine - tert-butoxide - tertbutoxide - t-butoxide - tbutoxide - tetraglyme - tetraguanide - tetrauret - thiosinamine - thiuram monosulfide - thiuram disulfide - triacetin - triacetamide - tributyrin - trichlorohydrin - triclofos - triethanolamine - triguanide - triglyme - trilaurin - trimyristin - triolein - trioxygen - tripalmitin - triphosgene - triptane - tristearin - triuret - tropylium - urea - uronium - vinylene - xanthate - xanthic acid - xanthicacid - - ammonia - water - hydrate - carbenium - - hydrofluoride - hydrochloride - hydrobromide - hydroiodide - hydriodide - hydroastatide - - hydrofluoricacid - hydrofluoric acid - hydrochloricacid - hydrochloric acid - hydrobromicacid - hydrobromic acid - hydroiodicacid - hydroiodic acid - hydriodicacid - hydriodic acid - hydroastaticacid - hydroastatic acid - - hydrocyanicacid - hydrocyanic acid - hydroisocyanicacid - hydroisocyanic acid - hydrazoicacid - hydrazoic acid - - - - - - formamide - methanamide - dimethoxyethane - dimethylacetamide - - glyoxalylamide - - - carbanolate - carbanolat - chlorazine - chlorazin - chlorobenzilate - chlorobenzilat - diazinone - diazinon - dithianone - dithianon - dihydromethanophenazine - iodamide - methoxychlor - methanophenazine - oxolinic acid - oxolinate - oxybenzone - pyridate - pyridat - sulfosalicylic acid - stibogluconate - - - - choline - creatine - ethanolamine - glycocyamine - hypotaurocyamine - taurocyamine - vitamin c - coenzyme a - coa - - - - - amin - amine - aminium - aminide - barbiturate - carboxylate - carboxylic - carboxylicacid - carboxylic acid - nitrone - paraben - perselenurane - persulfurane - selenurane - sulfurane - - \ No newline at end of file + + + + + + acetoin + acetone cyanohydrin + acetoxime|acetoxim + acetoxylium + acetylide|acetylid + acrolein + agmatine|agmatin + allicin + aluminohydride|aluminohydrid + amidogen + aminoxide|aminoxid + aminoxylium + aminylium + ammoniumolate|ammoniumolat + anandamide|anandamid + arsin + + biacetyl + bicine|bicin + bifluoride|bifluorid + biisopropenyl + biisopropyl + biguanide|biguanid + biguanidine|biguanidin + bismuth oxychloride|bismuthoxychloride|bismuth oxychlorid|bismuthoxychlorid + bistriflimide|bistriflimid + bisulfide|bisulfid + bitartrate|bitartrat + biurea + biuret + bombykol + borohydride|borohydrid + bromal + bromamine|bromamide|bromamin|bromamid + dibromamine|bromimide|dibromamin|bromimid + bromoform + busulfan + butoxide|n-butoxide|butoxid|n-butoxid + butoxylium + butyroin + cadaverine|cadaverin + camphor + camphorsulfonate|camphorsulfonat + camphor-10-sulfonate|camphor-10-sulfonat + camphorsulfonic|camphorsulfonicacid|camphorsulfonic acid + camphor-10-sulfonic|camphor-10-sulfonicacid|camphor-10-sulfonic acid + capraldehyde|capraldehyd + capramide|capramid + caprate|caprat + capric|capricacid|capric acid + caprinitrile|caprinitril + capriphenone|capriphenon + carbazone|carbazon + carbinol + carbodiazone|carbodiazon + carbodiimide|carbodiimid + carbonohydrazide|carbohydrazide|carbazide|carbonohydrazid|carbohydrazid|carbazid + cetane + cetanol + chloral + chloral formamide|chloralformamide|chloral formamid|chloralformamid + chloral hydrate|chloralhydrate|chloral hydrat|chloralhydrat + chloramine|chloramide|chloramin|chloramid + dichloramine|chlorimide|dichloramin|chlorimid + chloroform + chloropicrin + citral + citronellol|beta-citronellol + alpha-citronellol + citronellal|beta-citronellal + alpha-citronellal + crotonylene|crotonylen + cyanamide + cyanic|cyanicacid|cyanic acid + cyanogen + cyanogen azide|cyanogen azid + cyanogen bromide|cyanogen bromid + cyanogen chloride|cyanogen chlorid + cyanogen fluoride|cyanogen fluorid + cyanogen iodide|cyanogen iodid + cyanuric acid|cyanuricacid + dansylamide|dansylamid + diacetamide|diacetamid + diacetonamine|diacetonamin + diacetone alcohol + dichlorvos + dicyan + dicyanamide|dicyanamid + diethanolamine|diethanolamin + dimercaprol + diglyme + diphosgene|diphosgen + epibromohydrin + epichlorohydrin + epifluorohydrin + epiiodohydrin + erythrene|erythren + ethoxide|ethoxid + ethoxylium + ethylene|ethylen + alpha-farnesene|alpha-farnesen + beta-farnesene|beta-farnesen + farnesol + felbinac + ferricyanide|ferricyanid + ferrocyanide|ferrocyanid + fluoramine|fluoramide|fluoramin|fluoramid + difluoramine|fluorimide|difluoramin + fluoroform + fulminicacid|fulminic acid + gallohydride|gallohydrid + geranial + geraniol + glycerol carbonate|glycerin carbonate|glyceryl carbonate|glycerol carbonat|glycerin carbonat|glyceryl carbonat + glyme + glyoxal + glyoxime|glyoxim + halothane|halothan + hexametapol + hexamethyldisilazide|hexamethyldisilazid + hydronium + hydroxide|hydroxid + hydroxylium + hydroperoxylium + hydroxycitronellal + ibuprofen + imidogen + iodoform + iron oxychloride|ironoxychloride|iron oxychlorid|ironoxychlorid + isobutoxide|iso-butoxide|isobutoxid|iso-butoxid|isobutylate|iso-butylate|isobutylat|iso-butylat + isobutylene|isobutylen + isocarbonohydrazide|isocarbonohydrazid + isocyanic|isocyanicacid|isocyanic acid + isothiocyanic|isothiocyanicacid|isothiocyanic acid + isoselenocyanic|isoselenocyanicacid|isoselenocyanic acid + isotellurocyanic|isotellurocyanicacid|isotellurocyanic acid + isocyanuric acid|isocyanuricacid + isofulminicacid|isofulminic acid + isophorone|isophoron + isophorone diisocyanate|isophorone diisocyanat|isophoron diisocyanat + isopropoxide|iso-propoxide|isopropoxid|iso-propoxid + isoselenourea + isosemicarbazide|isosemicarbazid + isotellurourea + isothiourea + isourea + itatartrate|itatartrat + ketoprofen + laccerol + linalool + lignocerol + mesilate|mesilat + mesityl oxide|mesityl oxid + methacrolein + methamidophos + methoxide|methoxid + methoxylium + methylal + monoethanolamine|mono-ethanolamine|monoethanolamin|mono-ethanolamin + monoglyme + alpha-myrcene|alpha-myrcen + beta-myrcene|beta-myrcen + neopentyl glycol + neral + nerol + nerolidol + neurine + nitramide|nitramid + nitrenium + nitroform + nitroglycerin + nitrosonium + nitronium + nitrosamide|nitrosamid + nitrous oxide|nitrousoxide|nitrous oxid|nitrousoxid + nitroxide|nitroxid + nitroxyl + alpha-ocimene|alpha-ocimen + beta-ocimene|beta-ocimen + oxaldehyde|oxaldehyd + oxamide|oxamid + oxylium + ozone + ozonide|ozonid + penicillic|penicillicacid|penicillic acid + penicillate|penicillat + pentaerythritol + dipentaerythritol + tripentaerythritol + pentaguanide|pentaguanid + pentauret + peracetic|peraceticacid|peracetic acid + peracetate|peracetat + performic|performicacid|performic acid + performate + peroxylium + phorone + phosgene|phosgen + phosphin + phosphoramide|phosphoramid + + phosphorus oxybromide|phosphorusoxybromide|phosphorous oxybromide|phosphorousoxybromide|phosphorus oxybromid|phosphorusoxybromid|phosphorous oxybromid|phosphorousoxybromid + phosphorus oxychloride|phosphorusoxychloride|phosphorous oxychloride|phosphorousoxychloride|phosphorus oxychlorid|phosphorusoxychlorid|phosphorous oxychlorid|phosphorousoxychlorid + phosphorus oxyfluoride|phosphorusoxyfluoride|phosphorous oxyfluoride|phosphorousoxyfluoride|phosphorus oxyfluorid|phosphorusoxyfluorid|phosphorous oxyfluorid|phosphorousoxyfluorid + phosphorus oxyiodide|phosphorusoxyiodide|phosphorous oxyiodide|phosphorousoxyiodide|phosphorus oxyiodid|phosphorusoxyiodid|phosphorous oxyiodid|phosphorousoxyiodid + phosphorus pentasulfide|phosphorus pentasulfid + phytol + pinacol + pinacolone|pinacolon + piperylene|piperylen + pristane|pristan + propione + propionoin|propioin + propoxide|n-propoxide|propoxid|n-propoxid + propoxylium + propylene|propylen + prussicacid|prussic acid + pseudoselenourea + pseudotellurourea + pseudothiourea + pseudourea + putrescine|putrescin + rhodinol + rhodinal + rubeanic acid + sarin + sec-butoxide|secbutoxide|sec-butoxid|secbutoxid|sec-butylate|secbutylate|sec-butylat|secbutylat + selenilimine|selenilimin + selenium oxybromide|seleniumoxybromide|selenium oxybromid|seleniumoxybromid + selenoximide|selenoximine|selenoximid|selenoximin + semicarbazide|semicarbazid + semioxamazide|semioxamazid + soman + spermidine|spermidin + spermine|spermin + squalane|squalan + squalene|squalen + sulfamide|sulfamid + sulfilimine|sulfimide|sulfilimin|sulfimid + tabun + tartrate|tartrat + d-tartrate|(d)-tartrate|d(-)-tartrate|d-(-)-tartrate|d-tartrat|(d)-tartrat|d(-)-tartrat|d-(-)-tartrat + l-tartrate|(l)-tartrate|l(+)-tartrate|l-(+)-tartrate|l-tartrat|(l)-tartrat|l(+)-tartrat|l-(+)-tartrat + tellurilimine|tellurilimin + telluroximide|telluroximine|telluroximid|telluroximin + tertiary-butoxide|tertiarybutoxide|tert-butoxide|tertbutoxide|tert.-butoxide|tert.butoxide|t-butoxide|tbutoxide|tertiary-butoxid|tertiarybutoxid|tert-butoxid|tertbutoxid|tert.-butoxid|tert.butoxid|t-butoxid|tbutoxid|tertiary-butylate|tertiarybutylate|tert-butylate|tertbutylate|tert.-butylate|tert.butylate|t-butylate|tertiary-butylat|tertiarybutylat|tert-butylat|tertbutylat|tert.-butylat|tert.butylat|t-butylat + tertiary-pentoxide|tertiarypentoxide|tert-pentoxide|tertpentoxide|tert.-pentoxide|tert.pentoxide|t-pentoxide|tertiary-pentoxid|tertiarypentoxid|tert-pentoxid|tertpentoxid|tert.-pentoxid|tert.pentoxid|t-pentoxid|tertiary-pentylate|tertiarypentylate|tert-pentylate|tertpentylate|tert.-pentylate|tert.pentylate|t-pentylate|tertiary-pentylat|tertiarypentylat|tert-pentylat|tertpentylat|tert.-pentylat|tert.pentylat|t-pentylat|tertiary-amoxide|tertiaryamoxide|tert-amoxide|tertamoxide|tert.-amoxide|tert.amoxide|t-amoxide|tertiary-amoxid|tertiaryamoxid|tert-amoxid|tertamoxid|tert.-amoxid|tert.amoxid|t-amoxid|tertiary-amylate|tertiaryamylate|tert-amylate|tertamylate|tert.-amylate|tert.amylate|t-amylate|tertiary-amylat|tertiaryamylat|tert-amylat|tertamylat|tert.-amylat|tert.amylat|t-amylat + tetrabromoaluminate|tetrabromoaluminat + tetrachloroaluminate|tetrachloroaluminat + tetraglyme + tetrafluoroaluminate|tetrafluoroaluminat + tetraiodoaluminate|tetraiodoaluminat + tetraguanide|tetraguanid + tetrauret + tetrahydroaluminate|tetrahydroaluminat + tetrahydroborate|tetrahydroborat + tetrahydrogallate|tetrahydrogallat + thiosinamine|thiosinamin + thiuram monosulfide|thiuram monosulfid + thiuram disulfide|thiuram disulfid + triacetin + triacetamide|triacetamid + tributyrin + trichlorohydrin + triclofos + triethanolamine|triethanolamin + triflimide|triflimid + triflimidate|triflimidat + triflimidic acid + triguanide|triguanid + triglyme + trilaurin + trimyristin + triolein + trioxygen + tripalmitin + triphosgene|triphosgen + triptane + tristearin + triuret + trometamol|tromethamine|tromethamin|tromethane|tromethan + tropylium + uranyl + urea + uronium + vinylene|vinylen + xanthate|xanthat + xanthic acid|xanthicacid + + ammonia + water + carbenium + + hydrofluoride|hydrofluorid + hydrochloride|hydrochlorid + hydrobromide|hydrobromid + hydroiodide|hydriodide|hydroiodid|hydriodid + hydroastatide|hydroastatid + + hydrofluoricacid|hydrofluoric acid + hydrochloricacid|hydrochloric acid + hydrobromicacid|hydrobromic acid + hydroiodicacid|hydroiodic acid|hydriodicacid|hydriodic acid + hydroastaticacid|hydroastatic acid + + hydrocyanicacid|hydrocyanic acid + hydroisocyanicacid|hydroisocyanic acid + hydrazoicacid|hydrazoic acid + + + + bis((trifluoromethyl)sulfonyl)imide|bis(trifluoromethane)sulfonimide|bis(trifluoromethanesulfonyl)imide|bis(trifluoromethylsulfonyl)imide|bistrifluoromethanesulfonimide|bis[(trifluoromethyl)sulfonyl]imide|trifluoromethanesulfonimide|bis((trifluoromethyl)sulfonyl)imid|bis(trifluoromethane)sulfonimid|bis(trifluoromethanesulfonyl)imid|bis(trifluoromethylsulfonyl)imid|bistrifluoromethanesulfonimid|bis[(trifluoromethyl)sulfonyl]imid|trifluoromethanesulfonimid + bis((pentafluoroethyl)sulfonyl)imide|bis((perfluoroethyl)sulfonyl)imide|bis(pentafluoroethanesulfonyl)imide|bis(pentafluoroethylsulfonyl)imide|bis(perfluoroethanesulfonyl)imide|bis(perfluoroethylsulfonyl)imide|bispentafluoroethylsulfonylimide|bisperfluoroethylsulfonylimide|bis[(pentafluoroethyl)sulfonyl]imide|bis[(perfluoroethyl)sulfonyl]imide|bis((pentafluoroethyl)sulfonyl)imid|bis((perfluoroethyl)sulfonyl)imid|bis(pentafluoroethanesulfonyl)imid|bis(pentafluoroethylsulfonyl)imid|bis(perfluoroethanesulfonyl)imid|bis(perfluoroethylsulfonyl)imid|bispentafluoroethylsulfonylimid|bisperfluoroethylsulfonylimid|bis[(pentafluoroethyl)sulfonyl]imid|bis[(perfluoroethyl)sulfonyl]imid + tris((trifluoromethyl)sulfonyl)methide|tris(trifluoromethanesulfonyl)methide|tris(trifluoromethylsulfonyl)methide|tris[(trifluoromethyl)sulfonyl]methide|tris((trifluoromethyl)sulfonyl)methid|tris(trifluoromethanesulfonyl)methid|tris(trifluoromethylsulfonyl)methid|tris[(trifluoromethyl)sulfonyl]methid + tris((pentafluoroethyl)sulfonyl)methide|tris((perfluoroethyl)sulfonyl)methide|tris(pentafluoroethanesulfonyl)methide|tris(pentafluoroethylsulfonyl)methide|tris(perfluoroethanesulfonyl)methide|tris(perfluoroethylsulfonyl)methide|tris[(pentafluoroethyl)sulfonyl]methide|tris[(perfluoroethyl)sulfonyl]methide|tris((pentafluoroethyl)sulfonyl)methid|tris((perfluoroethyl)sulfonyl)methid|tris(pentafluoroethanesulfonyl)methid|tris(pentafluoroethylsulfonyl)methid|tris(perfluoroethanesulfonyl)methid|tris(perfluoroethylsulfonyl)methid|tris[(pentafluoroethyl)sulfonyl]methid|tris[(perfluoroethyl)sulfonyl]methid + + + + + + azobisisobutyronitrile|azobis-isobutyronitrile|azobisisobutyronitril|azobis-isobutyronitril + diphenylethylenediamine|diphenylethylenediamin + dicyclohexylurea + diepoxybutane|diepoxybutan + dimethylurea + dimethoxyethane|dimethoxyethan + dimethylacetamide|dimethyl-acetamide|dimethylacetamid|dimethyl-acetamid + carbonyldiimidazole|carbonyldiimidazol + formamide|formamid + mercaptopurine|mercaptopurin + methanamide|methanamid + phytantriol + trifluorothymidine|trifluorothymidin + trinitrotoluene + + glyoxalylamide|glyoxalylamid + + + carbanolate|carbanolat + chlorazine + chlorazin + chlorobenzilate|chlorobenzilat + benzocyclobutene|benzocyclobuten + diazinon|diazinone + diazolidinylurea|diazolidinyl urea + dichlorodiphenyltrichloroethane|dichlorodiphenyltrichloroethan + dimethylol ethylene urea + dithianone|dithianon + dihydromethanophenazine|dihydromethanophenazin + iodamide + imidazolidinylurea|imidazolidinyl urea + methanophenazine|methanophenazin + methoxychlor + methylazoxymethanol acetate|methylazoxymethanol acetat + methyldibromo glutaronitrile|methyldibromoglutaronitrile|methyldibromo glutaronitril|methyldibromoglutaronitril + oxolinic acid + oxolinate|oxolinat + oxybenzone|oxybenzon + pentazocine|pentazocin + pyridate|pyridat + pyrroloquinoline quinone|pyrroloquinoline quinon + sulfosalicylic acid + stibogluconate|stibogluconat + tetrabromogallate|tetrabromogallat + tetrachlorogallate|tetrachlorogallat + tetrafluorogallate|tetrafluorogallat + tetraiodogallate|tetraiodogallat + toluene-diisocyanate|toluene diisocyanate|toluene-diisocyanat|toluene diisocyanat + + + + biotin|biotine|d-biotin|d-biotine + biotin sulfone|biotine sulfone|d-biotin sulfone|d-biotine sulfone|biotin sulfon|d-biotin sulfon + biotin sulfoxide|biotine sulfoxide|d-biotin sulfoxide|d-biotine sulfoxide|biotin sulfoxid|d-biotin sulfoxid + choline|cholin + chlorocholine|chlorocholin + eicosasphinganine|eicosasphinganin + ethanolamine|ethanolamin + fluorocholine|fluorocholin + fluorouracil + propylthiouracil + glycerone|glyceron + glycocyamine|glycocyamin + guanidinium + icosasphinganine|icosasphinganin + leucinic acid|dl-leucinic acid + d-leucinic acid + l-leucinic acid + phytosphingosine|phytosphingosin + sphinganine|sphinganin + sphingosine|sphingosin + triethylcholine|triethylcholin + vitamin c + coenzyme a|coa + + + + + hydrate|hydrat + hbr|2hbr|3hbr|4hbr + hcl|2hcl|3hcl|4hcl + tfa|2tfa|3tfa|4tfa + + + + + amine|amin + aminium + aminide|aminid + barbiturate|barbiturat + boronic pinacol ester|boronic acid pinacol ester|boronicacid pinacol ester|boronicacidpinacol ester|boronicacid pinacolester|boronicacidpinacolester|boronic acidpinacol ester|boronic acidpinacolester|boronic acid pinacolester + carboxamide|carboxamid + carboxylate|carboxylat + carboxylic|carboxylicacid|carboxylic acid + diazonium + nitrone + paraben + perselenurane|perselenuran + persulfurane|persulfuran + selenurane|selenuran + sulfoximide|sulfoximine|sulfoximid|sulfoximin + sulfoxonium + sulfurane|sulfuran + + + + nitrolic acid|nitrolicacid + + diff -Nru opsin-1.5.0/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/simpleSubstituents.xml opsin-2.3.1/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/simpleSubstituents.xml --- opsin-1.5.0/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/simpleSubstituents.xml 2013-07-21 14:02:29.000000000 +0000 +++ opsin-2.3.1/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/simpleSubstituents.xml 2017-07-23 20:55:18.000000000 +0000 @@ -1,533 +1,520 @@ - - - - - 1-formazano - 1-isoureido - 1-isothioureido - 3-isoureido - 3-isothioureido - 5-formazano - abietamido - acetoxyl - acetamino - acetenyl - aci-nitro - acryl - active-amyl - activeamyl - amidoxalyl - amidyl - amidylidene - aminoxyl - aminyl - aminylidene - aminylidyne - amoxy - amyl - amylidene - anisal - asaryl - azidyl - isoamoxy - isoamyl - isoamylidene - neoamyl - t-amyl - tert-amyl - tertamyl - anilino - anisidino - benzal - benzidino - 4,4'-benzidino - beta-allyl - boc - borono - brosyl - butoxyl - cacodyl - carbamimidamido - carbazono - carbodiazono - carbonohydrazido - carbohydrazido - carbazido - carbmethoxy - carbomethoxy - carbethoxy - carboethoxy - carbopropoxy - carbpropoxy - carbobutoxy - carbbutoxy - carbopentoxy - carbpentoxy - carbohexoxy - carbhexoxy - carboheptoxy - carbheptoxy - carbooctoxy - carboctoxy - carbononoxy - carbnonoxy - carbodecoxy - carbdecoxy - carboundecoxy - carbundecoxy - carbododecoxy - carbdodecoxy - carbotridecoxy - carbtridecoxy - carbotetradecoxy - carbtetradecoxy - carbopentadecoxy - carbpentadecoxy - carbobenzyloxy - cbz - carbyne - carbynium - carvacryl - cetyl - cinnamal - cresyl - crotyl - cuminyl - cuminylidene - cumal - cumyl - alpha-cumyl - cyanamido - cyanyl - dabsyl - dansyl - desyl - deuterio - deutero - diazonio - duryl - ethoxyl - ethylenimino - eugenyl - formimino - - hydantoyl - hydrazino - hydrazono - hydrido - hydrogen - hydroseleno - hydroseleninyl - hydroselenonyl - hydrosulfinyl - hydrosulfonyl - hydrotelluro - hydrotellurinyl - hydrotelluronyl - isobutoxyl - isocrotyl - isocarbonohydrazido - isocyanyl - isoeugenyl - isopentenyl - isosemicarbazido - mesoxalo - mesyl - methallyl - methoxyl - methylol - morpholino - oxalaceto - hydroxamino - hydroximino - isopropoxyl - linalyl - neophyl - nerolidyl - nitramido - nitramino - nitroxy - nitrosyl - nitryl - nosyl - oximino - perselenuranyl - perselenuranylidene - persulfuranyl - persulfuranylidene - phenetidino - phenetyl - phenoxy - phenoxyl - picryl - pinacolyl - piperazino - piperidino - prenyl - propargyl - propoxyl - as-pseudocumyl - v-pseudocumyl - s-pseudocumyl - pyrrolidino - salicylal - sec-isoamyl - secisoamyl - sec-butoxyl - secbutoxyl - seleneno - selenino - selenono - selenyl - selenuranyl - selenuraniumyl - selenuranylidene - semicarbazido - siamyl - sulfino - sulfo - sulfoxy - sulfuranyl - sulfuraniumyl - sulfuranylidene - tellureno - tellurino - tellurono - telluryl - tert-butoxyl - tertbutoxyl - t-butoxyl - tbutoxyl - then-2-yl - then-2-ylidene - then-2-ylidyne - then-2-oyl - then-3-yl - then-3-ylidene - then-3-ylidyne - then-3-oyl - thexyl - thiyl - thymyl - toluidino - tosyl - trifloxy - tritio - trityl - ureido - vanillal - veratral - o-veratryl - xenyl - xylidino - - - - - - - amino - ammonio - phosphonio - arsonio - stibonio - bismuthonio - oxonio - sulfonio - selenonio - telluronio - fluoronio - chloronio - bromonio - iodonio - - diazo - azinoyl - azono - nitro - - fluorosyl - bromosyl - chlorosyl - chloroso - iodosyl - iodoso - - fluoryl - bromyl - chloryl - chloroxy - iodyl - iodoxy - - perfluoryl - perbromyl - perchloryl - periodyl - - fulminato - selenocyano - tellurocyano - thiocyano - isoselenocyano - isotellurocyano - isothiocyano - hydroxy - hydroxyl - hydroperoxy - hydroperoxyl - hydrogenperoxyl - perhydroxyl - oxyl - peroxyl - dioxyl - - oxo - keto - mercapto - sulfhydryl - thioxo - thiono - selenoxo - telluroxo - - phosphinimyl - phosphoroso - phosphino - phosphinyl - phosphinylidene - phosphinothioylidene - phosphinidene - - arsono - arsonato - arso - arsinimyl - arsoroso - arsenoso - arsino - arsinyl - arsinylidene - arsinothioylidene - arsinidene - - stibono - stibonato - stibo - stiboso - stibino - stibylene - - bismuthino - bismuthylene - - sulfonylidene - sulfonato - sulfinato - sulfeno - - nitroso - carboxy - carboxylato - amidino - guanyl - - oxalo - methoxalyl - ethoxalyl - - fmoc - tms - tbdms - - actinio - aluminio - americio - antimonio - argonio - arsenio - astatio - bario - berkelio - beryllio - bismuthio - borio - bromio - cadmio - caesio - calcio - californio - cerio - chlorio - chromio - cobaltio - cuprio - curio - dysprosio - einsteinio - erbio - europio - fermio - fluorio - francio - gadolinio - gallio - germanio - aurio - hafnio - helio - holmio - indio - iodio - iridio - ferrio - kryptonio - lanthanio - lawrencio - plumbio - lithio - lutetio - magnesio - manganio - mendelevio - mercurio - molybdenio - neodymio - neonio - neptunio - nickelio - niobio - nobelio - osmio - palladio - phosphorio - platinio - plutonio - polonio - potassio - kalio - praseodymio - promethio - protactinio - - radonio - rhenio - rhodio - rubidio - ruthenio - samario - scandio - selenio - silicio - argentio - sodio - natrio - strontio - sulfurio - tantalio - technetio - tellurio - terbio - thallio - thorio - thulio - stannio - titanio - tungstenio - wolframio - uranio - vanadio - xenonio - ytterbio - yttrio - zincio - zirconio - - - - diphospho - phosphono - phosphonato - phospho - sn-glycero-3-phospho - triphospho - - - - guanidino - tauryl - - 5'-adenylyl - 5'-thymidylyl - 5'-guanylyl - 5'-inosinylyl - 5'-xanthylyl - 5'-cytidylyl - 5'-uridylyl - 5'-orotidylyl - - 5'-adenosyl - 5'-deoxy-5'-adenosyl - adenosyl - 5'-thymidyl - 5'-guanosyl - 5'-inosyl - 5'-xanthosyl - 5'-cytidyl - 5'-uridyl - - - - - adenylyl - thymidylyl - guanylyl - inosinylyl - xanthylyl - cytidylyl - uridylyl - orotidylyl - - - - - azido - bromo - chloro - cyanato - cyano - fluoro - iodo - isocyanato - isocyano - isoselenocyanato - isotellurocyanato - isothiocyanato - - - - - amido - hydrazido - imido - nitrido - - - - - oxido - sulfido - selenido - tellurido - - - - perfluoro - perbromo - perchloro - periodo - perdeuterio - perdeutero - pertritio - - \ No newline at end of file + + + + + 1-formazano + 1-isoureido + 1-isothioureido + 3-isoureido + 3-isothioureido + 5-formazano + abietamido + acetoxyl + acetamino + acetenyl + aci-nitro + acroyl + acryl + active amyl|active-amyl|activeamyl + amidoxalyl + amidyl + amidylidene|amidyliden + aminoxy|aminoxyl + aminyl + aminylidene|aminyliden + aminylidyne|aminylidyn + amoxy + amyl|normalamyl|normal amyl|normal-amyl + amylidene|amyliden + anisal + asaryl + azidyl + iso-amoxy|isoamoxy + iso-amyl|isoamyl + iso-amylidene|isoamylidene|iso-amyliden|isoamyliden + neo-amyl|neoamyl + secamyl|sec.amyl|sec-amyl|sec.-amyl|secondaryamyl|secondary amyl|secondary-amyl + tertiaryamyl|tertiary amyl|tertiary-amyl|tert-amyl|tertamyl|tert.-amyl|tert.amyl|t-amyl + anilino + anisidino + benzal|benzylene + benzidino + 4,4'-benzidino + beta-allyl + biphen-2-yl + biphen-3-yl + biphen-4-yl + boc + borono + brosyl + butoxyl + cacodyl + carbamimidamido + carbazono + carbodiazono + carbonohydrazido|carbohydrazido|carbazido + carbmethoxy + carbomethoxy + carbethoxy|carboethoxy + carbopropoxy|carbpropoxy + carbobutoxy|carbbutoxy + carbopentoxy|carbpentoxy + carbohexoxy|carbhexoxy + carboheptoxy|carbheptoxy + carbooctoxy|carboctoxy + carbononoxy|carbnonoxy + carbodecoxy|carbdecoxy + carboundecoxy|carbundecoxy + carbododecoxy|carbdodecoxy + carbotridecoxy|carbtridecoxy + carbotetradecoxy|carbtetradecoxy + carbopentadecoxy|carbpentadecoxy + carballoxy|carboalloxy|carballyloxy|carboallyloxy + carbbenzoxy|carbobenzoxy|carbbenzyloxy|carbobenzyloxy + carbisopropoxy|carboisopropoxy|carb-i-propoxy|carbo-i-propoxy + carbphenoxy|carbophenoxy|carbphenyloxy|carbophenyloxy + carbo-tertiarybutoxy + carbo-tertiary-butoxy|carbo-tertbutoxy|carbo-tert.butoxy|carbo-tert-butoxy|carbo-tert.-butoxy|carbo-t-butoxy|carb-tertiarybutoxy|carb-tertiary-butoxy|carb-tertbutoxy|carb-tert.butoxy|carb-tert-butoxy|carb-tert.-butoxy|carb-t-butoxy|carbotertiarybutoxy|carbotertiary-butoxy|carbotertbutoxy|carbotert.butoxy|carbotert-butoxy|carbotert.-butoxy|carbot-butoxy|carbtertiarybutoxy|carbtertiary-butoxy|carbtertbutoxy|carbtert.butoxy|carbtert-butoxy|carbtert.-butoxy|carbt-butoxy + cbz + carbyne + carbynium + carvacryl + cetyl + cinnamal + cresyl + crotyl + cuminyl + cuminylidene|cuminyliden|cumal + cumoyl + cumyl|alpha-cumyl + cyanamido + cyanyl + cyclopentadienyl + dabsyl + dansyl + desyl + deuterio|deutero + diazonio + duryl + ethoxyl + ethylenimino + eugenyl + formimino + + homoallyl + homomorpholino + hydantoyl + hydrazino + hydrazono + hydrazyl + hydrido + hydrogen + hydroseleno + hydroseleninyl + hydroselenonyl + hydrosulfinyl + hydrosulfonyl + hydrotelluro + hydrotellurinyl + hydrotelluronyl + isobutoxyl|iso-butoxyl + isocrotyl + isocarbonohydrazido + isocyanyl + isoeugenyl + isopentenyl + isosemicarbazido + isoureido + isothioureido + mesoxalo + mesyl + methallyl + methoxyl + methylol + morpholino + oxalaceto + hydroxamino + hydroximino + isopropoxyl + linalyl + methacroyl + methacryl + neophyl + nerolidyl + nitramido + nitramino + nitroxy + nitrosyl + nitryl + nosyl + oximino + perselenuranyl + perselenuranylidene|perselenuranyliden + persulfuranyl + persulfuranylidene|persulfuranyliden + phenetidino + phenetyl + phenoxy|phenoxyl + picryl + pinacolyl + piperazino + piperidino + prenyl + propargyl + propoxyl + as-pseudocumyl + v-pseudocumyl + s-pseudocumyl + pyrrolidino + salicylal + sec-iso-amyl|sec.-iso-amyl|sec-isoamyl|sec.-isoamyl|secisoamyl + sec-butoxyl|sec.-butoxyl|secbutoxyl + seleneno + selenino + selenono + selenyl + selenuranyl + selenuraniumyl + selenuranylidene|selenuranyliden + semicarbazido + siamyl + sulfino + sulfo + sulfoxy + sulfuranyl + sulfuraniumyl + sulfuranylidene|sulfuranyliden + tellureno + tellurino + tellurono + telluryl + tertiary-butoxyl|tertiarybutoxyl|tert-butoxyl|tertbutoxyl|tert.-butoxyl|tert.butoxyl|t-butoxyl|tbutoxyl + then-2-yl + then-2-ylidene|then-2-yliden + then-2-ylidyne|then-2-ylidyn + then-2-oyl + then-3-yl + then-3-ylidene|then-3-yliden + then-3-ylidyne|then-3-ylidyn + then-3-oyl + thexyl + thiyl + thymyl + toluidino + tosyl + trifloxy + triflyl + tritio + trityl + ureido + vanillal + veratral + o-veratryl + xenyl + xylidino + + + + + + + amino + ammonio + phosphonio + arsonio + stibonio + bismuthonio + oxonio + sulfonio + selenonio + telluronio + fluoronio + chloronio + bromonio + iodonio + + diazo + azinoyl + azono + nitro + + fluorosyl + bromosyl + chlorosyl + chloroso + iodosyl + iodoso + + fluoryl + bromyl + chloryl + chloroxy + iodyl + iodoxy + + perfluoryl + perbromyl + perchloryl + periodyl + + fulminato + selenocyano + tellurocyano + thiocyano + isoselenocyano + isotellurocyano + isothiocyano + hydroxy|hydroxyl + hydroperoxy|hydroperoxyl|hydrogenperoxyl|perhydroxyl + oxyl + peroxyl|dioxyl + + oxo|keto + mercapto|sulfhydryl|sulfydryl + thioxo + thiono + selenoxo + telluroxo + + phosphinimyl + phosphoroso + phosphino + phosphinyl + phosphinylidene|phosphinyliden + phosphinothioylidene|phosphinothioyliden + phosphinidene|phosphiniden + + arsono + arsonato + arso + arsinimyl + arsoroso|arsenoso + arsino + arsinyl + arsinylidene|arsinyliden + arsinothioylidene|arsinothioyliden + arsinidene|arsiniden + + stibono + stibonato + stibo + stiboso + stibino + stibylene|stibylen + + bismuthino + bismuthylene|bismuthylen + + sulfonylidene|sulfonyliden + sulfonato + sulfinato + sulfeno + + nitroso + carboxy|carboxyl + carboxylato + amidino|guanyl + + oxalo + methoxalyl + ethoxalyl + + fmoc + tms + tbdms + tbdps + + t-butyl(dimethyl)silanoxy|t-butyl(dimethyl)siloxy|t-butyl-dimethylsilanoxy|t-butyl-dimethylsiloxy|t-butyldimethylsilanoxy|t-butyldimethylsiloxy|tert-butyl(dimethyl)silanoxy|tert-butyl(dimethyl)siloxy|tert-butyl-dimethylsilanoxy|tert-butyl-dimethylsiloxy|tert-butyldimethylsilanoxy|tert-butyldimethylsiloxy + t-butyl(dimethyl)silanyl|t-butyl(dimethyl)silyl|t-butyl-dimethylsilanyl|t-butyl-dimethylsilyl|t-butyldimethylsilanyl|t-butyldimethylsilyl|tert-butyl(dimethyl)silanyl|tert-butyl(dimethyl)silyl|tert-butyl-dimethylsilanyl|tert-butyl-dimethylsilyl|tert-butyldimethylsilanyl|tert-butyldimethylsilyl + t-butyl(diphenyl)silanoxy|t-butyl(diphenyl)siloxy|t-butyl-diphenylsilanoxy|t-butyl-diphenylsiloxy|t-butyldiphenylsilanoxy|t-butyldiphenylsiloxy|tert-butyl(diphenyl)silanoxy|tert-butyl(diphenyl)siloxy|tert-butyl-diphenylsilanoxy|tert-butyl-diphenylsiloxy|tert-butyldiphenylsilanoxy|tert-butyldiphenylsiloxy + t-butyl(diphenyl)silanyl|t-butyl(diphenyl)silyl|t-butyl-diphenylsilanyl|t-butyl-diphenylsilyl|t-butyldiphenylsilanyl|t-butyldiphenylsilyl|tert-butyl(diphenyl)silanyl|tert-butyl(diphenyl)silyl|tert-butyl-diphenylsilanyl|tert-butyl-diphenylsilyl|tert-butyldiphenylsilanyl|tert-butyldiphenylsilyl + + actinio + aluminio + americio + antimonio + argonio + arsenio + astatio + bario + berkelio + beryllio + bismuthio + borio + bromio + cadmio + caesio + calcio + californio + cerio + chlorio + chromio + cobaltio + cuprio + curio + dysprosio + einsteinio + erbio + europio + fermio + fluorio + francio + gadolinio + gallio + germanio + aurio + hafnio + helio + holmio + indio + iodio + iridio + ferrio + kryptonio + lanthanio + lawrencio + plumbio + lithio + lutetio + magnesio + manganio + mendelevio + mercurio + molybdenio + neodymio + neonio + neptunio + nickelio + niobio + nobelio + osmio + palladio + phosphorio + platinio + plutonio + polonio + potassio|kalio + praseodymio + promethio + protactinio + + radonio + rhenio + rhodio + rubidio + ruthenio + samario + scandio + selenio + silicio + argentio + sodio|natrio + strontio + sulfurio + tantalio + technetio + tellurio + terbio + thallio + thorio + thulio + stannio + titanio + tungstenio|wolframio + uranio + vanadio + xenonio + ytterbio + yttrio + zincio + zirconio + + + + diphospho + phosphono + phosphonato + phospho + sn-glycero-3-phospho + triphospho + + + + guanidino + tauryl + + 5'-adenylyl + 5'-thymidylyl + 5'-guanylyl + 5'-inosinylyl + 5'-xanthylyl + 5'-cytidylyl + 5'-uridylyl + 5'-orotidylyl + 5'-pseudouridylyl + + 5'-adenosyl|5'-deoxy-5'-adenosyl|adenosyl + 5'-thymidyl + 5'-guanosyl + 5'-inosyl + 5'-xanthosyl + 5'-cytidyl + 5'-uridyl + 5'-orotidyl + 5'-pseudouridyl + + + + + adenylyl + thymidylyl + guanylyl + inosinylyl + xanthylyl + cytidylyl + uridylyl + orotidylyl + pseudouridylyl + + + + + azido + bromo + chloro + cyanato + cyano + fluoro + iodo + isocyanato + isocyano + isoselenocyanato + isotellurocyanato + isothiocyanato + + + + + amido + hydrazido + imido + nitrido + + + + + oxido + sulfido + selenido + tellurido + + + + perfluoro + perbromo + perchloro + periodo + perdeuterio|perdeutero + pertritio + + diff -Nru opsin-1.5.0/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/substituents.xml opsin-2.3.1/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/substituents.xml --- opsin-1.5.0/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/substituents.xml 2013-07-21 14:02:29.000000000 +0000 +++ opsin-2.3.1/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/substituents.xml 2017-07-23 20:55:18.000000000 +0000 @@ -1,18 +1,16 @@ - - - - bor - sil - germ - stann - plumb - - homopiperon - phyt - phenac - piperon - salic - all - allan - vin - \ No newline at end of file + + + bor + sil + germ + stann + plumb + + homopiperon + phyt + phenac + piperon + salic + all|allan + vin + diff -Nru opsin-1.5.0/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/suffixApplicability.xml opsin-2.3.1/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/suffixApplicability.xml --- opsin-1.5.0/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/suffixApplicability.xml 2013-07-21 14:02:29.000000000 +0000 +++ opsin-2.3.1/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/suffixApplicability.xml 2017-07-23 20:55:18.000000000 +0000 @@ -1,281 +1,306 @@ - - - - carbonyl - ide - ium - oxy - oyl - selenenyl - seleninyl - selenonyl - sulfenyl - sulfinyl - sulfonyl - tellurenyl - tellurinyl - telluronyl - uide - yl - ylidene - ylidyne - ylium - + + carbonyl + carboximidoyl + ide + ium + oximino + oxy + oyl + selenenyl + seleninyl + selenonyl + sulfenyl + sulfinyl + sulfonyl + tellurenyl + tellurinyl + telluronyl + uide + yl + ylidene + ylidyne + ylium + - - acylium - aldehyde - aldoxime - amide - amidylium - amidine - amidinium - amidium - amido - anilide - ate - hydrazide - hydrazido - hydroxamate - hydroxamic - ic - ide - cyclicimide - cyclicimidium - cyclicimido - cyclicimidylium - ium - ite - lactam - lactim - lactone - nitrile - nitrilium - onaphthone - - ophenone - ous - oxy-ylForAcyl - oxy - oyl - sultam - sultim - sultine - sultone - uide - oyl - yl - ylidene - ylidyne - oxoAndDiYl - oxoAndTriYl - ylium - + + acylium + aldehyde + aldoxime + amide + amidrazone + amidylium + amidine + amidinium + amidium + amido + anilide + ate + hydrazide + hydrazido + hydroxamate + hydroxamic + ic + ic_O_acid + ic_S_acid + ic_Se_acid + ic_Te_acid + ide + cyclicimide + cyclicimidium + cyclicimido + cyclicimidylium + ium + lactam + lactim + lactone + nitrile + nitrilium + nitrolic acid + onaphthone + + ophenone + oximino-ylForAcyl + oximino + oxy-ylForAcyl + oxy + oyl + sultam + sultim + sultine + sultone + uide + oyl + yl + ylidene + ylidyne + oxoAndDiYl + oxoAndTriYl + ylium + - - aldehyde - aldehyde - aldehyde - aldoxime - amide - amidylium - amidium - amido - anilide - ate - hydrazide - hydrazide - hydrazido - hydrazido - hydroxamate - hydroxamate - hydroxamic - hydroxamic - ic - cyclicimide - cyclicimidium - cyclicimido - cyclicimidylium - ium - ol - ol - oyl - oyl - yl - yl - ylidene - + + aldehyde + aldehyde + aldehyde + aldoxime + amide + amidrazone + amidylium + amidium + amido + anilide + ate + hydrazide + hydrazide + hydrazido + hydrazido + hydroxamate + hydroxamate + hydroxamic + hydroxamic + ic + ic_O_acid + ic_S_acid + ic_Se_acid + ic_Te_acid + cyclicimide + cyclicimidium + cyclicimido + cyclicimidylium + ium + ol + ol + olate + olate + oyl + oyl + yl + yl + ylidene + - + - carbonyl_to_hydroxy - carbonyl_to_hydroxy - onamide_aldehyde - onamide_aldehyde - ous - ous - ite - ite - ononitrile_aldehyde - ononitrile_aldehyde - yl - yl + carbonyl_to_hydroxy + carbonyl_to_hydroxy + onamide_aldehyde + onamide_aldehyde + ous + ous + ite + ite + ononitrile_aldehyde + ononitrile_aldehyde + yl + yl - hydroxy_to_amide - hydroxy_to_ate - hydroxy_to_icacid - hydroxy_to_nitrile - hydroxy_to_carbonyl - ic_nonCarboxylic - hydroxy_to_amide - hydroxy_to_ate - hydroxy_to_icacid - hydroxy_to_nitrile - hydroxy_to_acyl - yl_carbohydrate - yl - ylidene - + lactone + - - ylium - amine - aminylium - amino - ite - ous - diyl - diyl - ylidene - + hydroxy_to_amide + hydroxy_to_ate + hydroxy_to_icacid + hydroxy_to_nitrile + hydroxy_to_carbonyl + ic_nonCarboxylic + hydroxy_to_amide + hydroxy_to_ate + hydroxy_to_icacid + hydroxy_to_nitrile + hydroxy_to_acyl + yl_carbohydrate + yl + ylidene + - - acylium_nonCarboxylic - ate_nonCarboxylic - ic_nonCarboxylic - ite_nonCarboxylic - ous_nonCarboxylic - oyl_nonCarboxylic - oyl_nonCarboxylic - + + ylium + amine + aminylium + aminoAndYl + ite + ous + diyl + diyl + ylidene + - - acylium - aldehyde - aldehyde - aldoxime - amide - amidylium - amidine - amidinium - amidium - amido - amine - aminide - aminium - amino - aminylium - anilide - arsonous - ate - azonic - arsonite - azonate - azonite - azonous - boronate - boronic - carbamate - carbamic - carbolactone - carbonyl - carbonylium - carboxylic - carboxylate - carboxylite - diazonium - dicarboximide - hydrazide - hydrazido - hydrazonic - hydroxamate - hydroxamic - ic - ide - imine - iminide - iminium - iminyl - iminylium - io - ium - ite - lactam - lactim - lactone - nitrile - nitrilium - ol - olate - lactone - yl - onaphthone - one - ophenone - ous - oxy - oyl - phosphonite - phosphonous - selenenic - selenenyl - seleninyl - selenonyl - selone - stibonite - stibonous - sulfamate - sulfamic - sulfenamide - sulfenamido - selenenate - sulfenate - sulfenic - sulfenoselenoate - sulfenoselenoic - sulfenoselenoyl - sulfenothioate - sulfenothioic - sulfenothioyl - sulfenyl - sulfinyl - sulfonyl - sultam - sultim - sultine - sultone - tellone - tellurenate - tellurenic - tellurenyl - tellurinyl - telluronyl - thione - uide - yl - ylidene - ylidyne - ylium - - \ No newline at end of file + + acylium_nonCarboxylic + ate_nonCarboxylic + ic_nonCarboxylic + ite_nonCarboxylic + ous_nonCarboxylic + oyl_nonCarboxylic + oyl_nonCarboxylic + + + + acylium + aldehyde + aldehyde + aldoxime + amide + amidrazone + amidylium + amidine + amidinium + amidium + amido + amine + aminide + aminium + amino + aminylium + anilide + arsonous + ate + azonic + arsonite + azonate + azonite + azonous + boronate + boronic + boronicacidpinacolester + carbamate + carbamic + carbolactone + carboximidoyl + carbonyl + carbonylium + carboxamide + carboxylic + carboxylate + carboxylite + diazonium + dicarboximide + hydrazide + hydrazido + hydrazonic + hydroxamate + hydroxamic + ic + ic_O_acid + ic_S_acid + ic_Se_acid + ic_Te_acid + ide + imine + iminide + iminium + iminyl + iminylium + io + ium + lactam + lactim + lactone + nitrile + nitrilium + nitrolic acid + ol + olate + lactone + yl + onaphthone + one + ophenone + oximino + oxy + oyl + phosphonite + phosphonous + selenenic + selenenyl + seleninyl + selenonyl + selone + stibonite + stibonous + sulfamate + sulfamic + sulfenamide + sulfenamido + selenenate + sulfenate + sulfenic + sulfenoselenoate + sulfenoselenoic + sulfenoselenoyl + sulfenothioate + sulfenothioic + sulfenothioyl + sulfenyl + sulfinyl + sulfonyl + sultam + sultim + sultine + sultone + tellone + tellurenate + tellurenic + tellurenyl + tellurinyl + telluronyl + thione + uide + yl + ylidene + ylidyne + ylium + + diff -Nru opsin-1.5.0/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/suffixes.xml opsin-2.3.1/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/suffixes.xml --- opsin-1.5.0/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/suffixes.xml 2013-07-21 14:02:29.000000000 +0000 +++ opsin-2.3.1/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/suffixes.xml 2017-07-23 20:55:18.000000000 +0000 @@ -1,208 +1,142 @@ - - - - - - - - amine - amin - aminide - aminid - aminium - aminylium - carbonylium - carboxylate - carboxylat - carboxylic - carboxylicacid - carboxylic acid - carboxylite - carboxylit - diazonium - imine - imin - iminide - iminid - iminium - iminylium - quinone - quinon - ol - olate - olat - one - on - selone - selon - selenenate - selenenat - selenenic - selenenicacid - selenenic acid - sulfenoselenoate - sulfenoselenoat - sulfenoselenoic - sulfenoselenoicacid - sulfenoselenoic acid - sulfenothioate - sulfenothioat - sulfenothioic - sulfenothioicacid - sulfenothioic acid - sulfenamide - sulfenamid - sulfenate - sulfenat - sulfenic - sulfenicacid - sulfenic acid - tellone - tellon - tellurenate - tellurenat - tellurenic - tellurenicacid - tellurenic acid - - - arsonite - arsonit - arsonous - arsonousacid - arsonous acid - azonic - azonicacid - azonic acid - azonate - azonat - azonite - azonit - azonous - azonousacid - azonous acid - boronate - boronat - boronic - boronicacid - boronic acid - carbamate - carbamat - carbamic - carbamicacid - carbamic acid - phosphonite - phosphonit - phosphonous - phosphonousacid - phosphonous acid - stibonite - stibonit - stibonous - stibonousacid - stibonous acid - sulfamate - sulfamat - sulfamic - sulfamicacid - sulfamic acid - - - - - al - aldehydic - aldehydicacid - aldehydic acid - aldoxime - aldoxim - amate - amat - amic - amicacid - amic acid - anilate - anilic - anilicacid - anilic acid - ite - it - onaphthone - onaphthon - naphthone - naphthon - ophenone - ophenon - phenone - phenon - ous - ousacid - ous acid - - - - - - aldehyde - aldehyd - amide - amid - amidylium - amidium - anilide - anilid - amidine - amidin - amidinium - ate - at - hydrazide - hydrazid - hydroxamic - hydroxamicacid - hydroxamic acid - hydroxamate - hydroxamat - ic - icacid - ic acid - nitrile - nitril - nitrilium - ylium - - - - - one - on - selone - selon - tellone - tellon - thione - thion - - - - - imide - imid - imidium - imidylium - - - - - carbolactone - dicarboximide - lactam - lactim - lactone - olide - sultam - sultim - sultine - sultone - - \ No newline at end of file + + + + + + + amine|amin + aminide|aminid + aminium + aminylium + carbonylium + carboxyamide|carboxyamid + carboxylate|carboxylat + carboxylic|carboxylicacid|carboxylic acid + carboxylite|carboxylit + diazonium + imine|imin + iminide|iminid + iminium + iminylium + quinone|quinon + ol + olate|olat + one|on + selone|selon + selenenate|selenenat + selenenic|selenenicacid|selenenic acid + sulfenoselenoate|sulfenoselenoat + sulfenoselenoic|sulfenoselenoicacid|sulfenoselenoic acid + sulfenothioate|sulfenothioat + sulfenothioic|sulfenothioicacid|sulfenothioic acid + sulfenamide|sulfenamid + sulfenate|sulfenat + sulfenic|sulfenicacid|sulfenic acid + tellone|tellon + tellurenate|tellurenat + tellurenic|tellurenicacid|tellurenic acid + + + arsonite|arsonit + arsonous|arsonousacid|arsonous acid + azonic|azonicacid|azonic acid + azonate|azonat + azonite|azonit + azonous|azonousacid|azonous acid + boronate|boronat + boronic|boronicacid|boronic acid + boronic pinacol ester|boronic acid pinacol ester|boronicacid pinacol ester|boronicacidpinacol ester|boronicacid pinacolester|boronicacidpinacolester|boronic acidpinacol ester|boronic acidpinacolester|boronic acid pinacolester + carbamate|carbamat + carbamic|carbamicacid|carbamic acid + phosphonite|phosphonit + phosphonous|phosphonousacid|phosphonous acid + stibonite|stibonit + stibonous|stibonousacid|stibonous acid + sulfamate|sulfamat + sulfamic|sulfamicacid|sulfamic acid + + + + + al + + + aldehydic|aldehydicacid|aldehydic acid + aldoxime|aldoxim + amate|amat + amic|amicacid|amic acid + anilate|anilat + anilic|anilicacid|anilic acid + ite|it + nitrolic acid|nitrolicacid + onaphthone|onaphthon|naphthone|naphthon + ophenone|ophenon|phenone|phenon + ous|ousacid|ous acid + + + + + + aldehyde|aldehyd + amide|amid + amidium + anilide|anilid + amidine|amidin + amidinium + amidrazone|amidrazon + amidylium + ate|at + hydrazide|hydrazid + hydroxamic|hydroxamicacid|hydroxamic acid + hydroxamate|hydroxamat + ic|icacid|ic acid + ic acid anion + ic o-acid + ic s-acid + ic se-acid + ic te-acid + nitrile|nitril + nitrilium + ylium + + + + + aldehyde|aldehyd + + + + + one|on + selone|selon + tellone|tellon + thione|thion + + + + ol + + + + + imide|ic imide|ic acid imide|imid|ic imid|ic acid imid + imidium|ic imidium|ic acid imidium + imidylium|ic imidylium|ic acid imidylium + + + + + carbolactone|carbolacton + dicarboximide|dicarboximid + lactam + lactim + lactone|lacton + olide|olid + sultam + sultim + sultine|sultin + sultone|sulton + + + diff -Nru opsin-1.5.0/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/suffixPrefix.xml opsin-2.3.1/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/suffixPrefix.xml --- opsin-1.5.0/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/suffixPrefix.xml 2013-07-21 14:02:29.000000000 +0000 +++ opsin-2.3.1/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/suffixPrefix.xml 2017-07-23 20:55:18.000000000 +0000 @@ -1,36 +1,23 @@ - - - - carb - carbo - carbox - carbono - - sulfon - sulfono - sulfin - sulfino - - selenon - selenono - selenin - selenino - - telluron - tellurono - tellurin - tellurino - - - - phosphon - phosphono - arson - arsono - stibon - stibono - \ No newline at end of file + + + + carb|carbo|carbox|carbono + + sulfon|sulfono + sulfin|sulfino + + selenon|selenono + selenin|selenino + + telluron|tellurono + tellurin|tellurino + + + phosphon|phosphono + arson|arsono + stibon|stibono + diff -Nru opsin-1.5.0/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/suffixRules.dtd opsin-2.3.1/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/suffixRules.dtd --- opsin-1.5.0/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/suffixRules.dtd 2013-07-21 14:02:29.000000000 +0000 +++ opsin-2.3.1/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/suffixRules.dtd 2017-07-23 20:55:18.000000000 +0000 @@ -1,15 +1,14 @@ - - + + + ketoneLocant (yes) #IMPLIED> @@ -23,3 +22,5 @@ + + diff -Nru opsin-1.5.0/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/suffixRules.xml opsin-2.3.1/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/suffixRules.xml --- opsin-1.5.0/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/suffixRules.xml 2013-07-21 14:02:29.000000000 +0000 +++ opsin-2.3.1/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/suffixRules.xml 2017-07-23 20:55:18.000000000 +0000 @@ -1,456 +1,503 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - \ No newline at end of file + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff -Nru opsin-1.5.0/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/tokenFiles.dtd opsin-2.3.1/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/tokenFiles.dtd --- opsin-1.5.0/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/tokenFiles.dtd 2013-07-21 14:02:29.000000000 +0000 +++ opsin-2.3.1/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/tokenFiles.dtd 2017-07-23 20:55:18.000000000 +0000 @@ -1,2 +1,2 @@ - \ No newline at end of file + diff -Nru opsin-1.5.0/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/tokenList.dtd opsin-2.3.1/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/tokenList.dtd --- opsin-1.5.0/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/tokenList.dtd 2013-07-21 14:02:29.000000000 +0000 +++ opsin-2.3.1/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/tokenList.dtd 2017-07-23 20:55:18.000000000 +0000 @@ -1,31 +1,32 @@ - \ No newline at end of file + usableAsAJoiner CDATA #IMPLIED +> \ No newline at end of file diff -Nru opsin-1.5.0/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/tokenLists.dtd opsin-2.3.1/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/tokenLists.dtd --- opsin-1.5.0/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/tokenLists.dtd 2013-07-21 14:02:29.000000000 +0000 +++ opsin-2.3.1/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/tokenLists.dtd 2017-07-23 20:55:18.000000000 +0000 @@ -3,9 +3,9 @@ - \ No newline at end of file + alphaBetaClockWiseAtomOrdering == Listed in a clockwise order, the locants of the atoms that define a pseudo 2D plane for alpha/beta stereochemistry. + commonOxidationStatesAndMax == For elements, the typical oxidation states (comma separated) then a colon and the maximum oxidation station. e.g. for iron "2,3:6" as 2 and 3 are typical. 6 is possible but atypical. + defaultInID == The ID of the atom which by default an incoming fragment should connect to. ID is relative to this particular fragment (first atom =1) + defaultInLocant == The locant of the atom which by default an incoming fragment should connect to. Typically used on groups + frontLocantsExpected == Comma separated list of locants indicating what locants are expected in front of this group. If one of these locants is found it will preferentially be assigned to the suffix of the group. The group should correspond to a retained name. (codified in P-29.6 of IUPAC 2004 rules) + functionalIDs == Comma separated IDs of locations which are functionalIDs on a group. IDs are relative to this particular fragment (first atom =1). Functional IDs are used in ester formation. + fusedRingNumbering == The numbering to use for a ring when used in a fused ring system. Same syntax as labels attribute. It is only necessary where the ring has non-standard numbering that does not proceed regularly around the edge of the fused ring system c.f. purine + homology == Semi-colon delimited list of labels for * atoms, used for when * atoms represent generic groups (homology groups) e.g. Alkyl + iminoLike == yes or absent. If present indicates that the substituent can either be -X- or X= or even -X=. It changes some behaviour to encourage substitutive bonding rather than additive bonds where this substituent is involved. This attribute is mostly ignored when doing polymers for which imino is always -N- + labels == Slash delimited list of locants. List must be the same length as number of atoms. Multiple locants can be given to an atom by comma delimiting them + locant == Used for an/ane to prevent a locant being given + naturalEntIsOpposite == yes or absent. Indicates that this trivial name has the opposite D/L stereochemistry to others in its class i.e. L- for carbohydrates or D- for amino acids + outIDs == Comma separated IDs of locations of radicals on a substituent. Typically used on substituents to create multivalent substituents. IDs are relative to this particular fragment (first atom = 1) + suffixAppliesTo == comma separated IDs indicating where the suffix following a group should be applied. Currently used to direct the formation of trivial di-acids. IDs are relative to this particular fragment first atom = 1) + suffixAppliesToByDefault == same as suffixAppliesTo but can be overridden by given locants + usableAsAJoiner == yes or absent. Can the substituent be implicitly bracketed to a previous substitutent e.g. methylaminobenzene becomes (methylamino)benzene as amino has this attribute + --> diff -Nru opsin-1.5.0/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/unsaturators.xml opsin-2.3.1/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/unsaturators.xml --- opsin-1.5.0/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/unsaturators.xml 2013-07-21 14:02:29.000000000 +0000 +++ opsin-2.3.1/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/unsaturators.xml 2017-07-23 20:55:18.000000000 +0000 @@ -1,15 +1,13 @@ - - - - - en - ene - yne - yn - - - - ane - an - - + + + + + ene|en + yne|yn + + + + + ane|an + + diff -Nru opsin-1.5.0/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/wordRules.dtd opsin-2.3.1/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/wordRules.dtd --- opsin-1.5.0/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/wordRules.dtd 2013-07-21 14:02:29.000000000 +0000 +++ opsin-2.3.1/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/wordRules.dtd 2017-07-23 20:55:18.000000000 +0000 @@ -3,17 +3,14 @@ - - diff -Nru opsin-1.5.0/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/wordRules.xml opsin-2.3.1/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/wordRules.xml --- opsin-1.5.0/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/wordRules.xml 2013-07-21 14:02:29.000000000 +0000 +++ opsin-2.3.1/opsin-core/src/main/resources/uk/ac/cam/ch/wwmm/opsin/resources/wordRules.xml 2017-07-23 20:55:18.000000000 +0000 @@ -1,354 +1,409 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - \ No newline at end of file + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff -Nru opsin-1.5.0/opsin-core/src/test/java/uk/ac/cam/ch/wwmm/opsin/AmbiguityDetectionTest.java opsin-2.3.1/opsin-core/src/test/java/uk/ac/cam/ch/wwmm/opsin/AmbiguityDetectionTest.java --- opsin-1.5.0/opsin-core/src/test/java/uk/ac/cam/ch/wwmm/opsin/AmbiguityDetectionTest.java 1970-01-01 00:00:00.000000000 +0000 +++ opsin-2.3.1/opsin-core/src/test/java/uk/ac/cam/ch/wwmm/opsin/AmbiguityDetectionTest.java 2017-07-23 20:55:18.000000000 +0000 @@ -0,0 +1,54 @@ +package uk.ac.cam.ch.wwmm.opsin; + +import static org.junit.Assert.assertEquals; + +import java.io.BufferedReader; +import java.io.IOException; +import java.io.InputStreamReader; + +import org.apache.commons.io.IOUtils; +import org.junit.AfterClass; +import org.junit.BeforeClass; +import org.junit.Test; + +public class AmbiguityDetectionTest { + + private static NameToStructure n2s; + + @BeforeClass + public static void setUp() { + n2s = NameToStructure.getInstance(); + } + + @AfterClass + public static void cleanUp(){ + n2s = null; + } + + @Test + public void testNamesThatShouldBeDetectedAsAmbiguous() throws IOException{ + checkNames("ambiguous.txt", true); + } + + @Test + public void testUnAmbiguousCounterExamples() throws IOException{ + checkNames("unambiguous.txt", false); + } + + private void checkNames(String file, boolean isAmbiguous) throws IOException{ + BufferedReader input = new BufferedReader(new InputStreamReader(this.getClass().getResourceAsStream(file))); + try { + String line = null; + while ((line = input.readLine()) != null) { + if(line.startsWith("//")){ + continue; + } + OpsinResult result = n2s.parseChemicalName(line); + assertEquals(line + " gave unexpected result", isAmbiguous, result.nameAppearsToBeAmbiguous()); + } + } finally { + IOUtils.closeQuietly(input); + } + } + +} diff -Nru opsin-1.5.0/opsin-core/src/test/java/uk/ac/cam/ch/wwmm/opsin/AtomTest.java opsin-2.3.1/opsin-core/src/test/java/uk/ac/cam/ch/wwmm/opsin/AtomTest.java --- opsin-1.5.0/opsin-core/src/test/java/uk/ac/cam/ch/wwmm/opsin/AtomTest.java 2013-07-21 14:02:29.000000000 +0000 +++ opsin-2.3.1/opsin-core/src/test/java/uk/ac/cam/ch/wwmm/opsin/AtomTest.java 2017-07-23 20:55:18.000000000 +0000 @@ -1,10 +1,7 @@ package uk.ac.cam.ch.wwmm.opsin; -import static junit.framework.Assert.assertEquals; -import static junit.framework.Assert.assertFalse; -import static junit.framework.Assert.assertNotNull; -import static junit.framework.Assert.assertTrue; -import nu.xom.Element; +import static org.junit.Assert.*; +import static org.mockito.Mockito.mock; import org.junit.Before; import org.junit.Test; @@ -13,33 +10,24 @@ public class AtomTest { private Fragment frag; - private FragmentManager fm = new FragmentManager(new SMILESFragmentBuilder(), new IDManager()); + private SMILESFragmentBuilder sBuilder = new SMILESFragmentBuilder(new IDManager()); @Before public void setUp() { - frag = new Fragment(); + frag = new Fragment(mock(Element.class)); } @Test public void testAtom() { - Atom atom = new Atom(10, "C", frag); + Atom atom = new Atom(10, ChemEl.C, frag); assertNotNull("Got atom", atom); assertEquals("Id = 10", 10, atom.getID()); - assertEquals("Element = C", "C", atom.getElement()); - } - - @Test - public void testToCMLAtom() { - Atom atom = new Atom(10, "C", frag); - atom.addLocant("1"); - Element elem = atom.toCMLAtom(); - assertNotNull("Got XOM Element", elem); - assertEquals("Correct XML", "", elem.toXML()); + assertEquals("Element = C", ChemEl.C, atom.getElement()); } @Test public void testAddLocantHasLocant() { - Atom atom = new Atom(10, "C", frag); + Atom atom = new Atom(10, ChemEl.C, frag); atom.addLocant("1"); assertTrue("Atom has locant '1'", atom.hasLocant("1")); assertFalse("Atom has no locant 'C'", atom.hasLocant("C")); @@ -49,19 +37,12 @@ @Test public void testGetIncomingValency() throws StructureBuildingException { - SMILESFragmentBuilder sBuilder = new SMILESFragmentBuilder(); - assertEquals("No bonds", 0, - sBuilder.build("C", fm).getAtomList().get(0).getIncomingValency()); - assertEquals("One bond", 1, - sBuilder.build("CC", fm).getAtomList().get(0).getIncomingValency()); - assertEquals("Two bonds", 2, - sBuilder.build("C(C)C", fm).getAtomList().get(0).getIncomingValency()); - assertEquals("Double bond", 2, - sBuilder.build("C=O", fm).getAtomList().get(0).getIncomingValency()); - assertEquals("Triple bond", 3, - sBuilder.build("C#C", fm).getAtomList().get(0).getIncomingValency()); - assertEquals("One bond", 1, - sBuilder.build("CC=CC#N", fm).getAtomList().get(0).getIncomingValency()); + assertEquals("No bonds", 0, sBuilder.build("C").getFirstAtom().getIncomingValency()); + assertEquals("One bond", 1, sBuilder.build("CC").getFirstAtom().getIncomingValency()); + assertEquals("Two bonds", 2, sBuilder.build("C(C)C").getFirstAtom().getIncomingValency()); + assertEquals("Double bond", 2, sBuilder.build("C=O").getFirstAtom().getIncomingValency()); + assertEquals("Triple bond", 3, sBuilder.build("C#C").getFirstAtom().getIncomingValency()); + assertEquals("One bond", 1, sBuilder.build("CC=CC#N").getFirstAtom().getIncomingValency()); } } diff -Nru opsin-1.5.0/opsin-core/src/test/java/uk/ac/cam/ch/wwmm/opsin/BondTest.java opsin-2.3.1/opsin-core/src/test/java/uk/ac/cam/ch/wwmm/opsin/BondTest.java --- opsin-1.5.0/opsin-core/src/test/java/uk/ac/cam/ch/wwmm/opsin/BondTest.java 2013-07-21 14:02:29.000000000 +0000 +++ opsin-2.3.1/opsin-core/src/test/java/uk/ac/cam/ch/wwmm/opsin/BondTest.java 2017-07-23 20:55:18.000000000 +0000 @@ -1,19 +1,20 @@ package uk.ac.cam.ch.wwmm.opsin; -import static junit.framework.Assert.assertEquals; -import static junit.framework.Assert.assertNotNull; -import nu.xom.Element; +import static org.junit.Assert.*; +import static org.mockito.Mockito.mock; import org.junit.Test; +import uk.ac.cam.ch.wwmm.opsin.Bond.SMILES_BOND_DIRECTION; +import uk.ac.cam.ch.wwmm.opsin.BondStereo.BondStereoValue; public class BondTest { @Test public void testBond() { - Fragment frag = new Fragment(); - Atom a1 = new Atom(1, "C", frag); - Atom a2 = new Atom(2, "C", frag); + Fragment frag = new Fragment(mock(Element.class)); + Atom a1 = new Atom(1, ChemEl.C, frag); + Atom a2 = new Atom(2, ChemEl.C, frag); frag.addAtom(a1); frag.addAtom(a2); Bond bond = new Bond(a1, a2, 1); @@ -21,19 +22,32 @@ assertEquals("From = 1", 1, bond.getFrom()); assertEquals("To = 2", 2, bond.getTo()); assertEquals("Order = 1", 1, bond.getOrder()); + assertEquals(a1, bond.getFromAtom()); + assertEquals(a2, bond.getToAtom()); + assertEquals(a2, bond.getOtherAtom(a1)); + assertEquals(a1, bond.getOtherAtom(a2)); + assertEquals(null, bond.getBondStereo()); + assertEquals(null, bond.getSmilesStereochemistry()); } - + @Test - public void testToCMLBond() { - Fragment frag = new Fragment(); - Atom a1 = new Atom(1, "C", frag); - Atom a2 = new Atom(2, "C", frag); + public void testBondMutation() { + Fragment frag = new Fragment(mock(Element.class)); + Atom a1 = new Atom(1, ChemEl.C, frag); + Atom a2 = new Atom(2, ChemEl.C, frag); + Atom a3 = new Atom(3, ChemEl.C, frag); + Atom a4 = new Atom(4, ChemEl.C, frag); frag.addAtom(a1); frag.addAtom(a2); - Bond bond = new Bond(a1, a2, 1); - Element elem = bond.toCMLBond(); - assertNotNull("Got XOM Element", elem); - assertEquals("Correct XML", "", elem.toXML()); + frag.addAtom(a3); + frag.addAtom(a4); + Bond bond = new Bond(a2, a3, 1); + bond.setOrder(2); + assertEquals("Order = 2", 2, bond.getOrder()); + BondStereo bondStereo = new BondStereo(new Atom[]{a1,a2,a3,a4}, BondStereoValue.TRANS); + bond.setBondStereo(bondStereo); + assertEquals(bondStereo, bond.getBondStereo()); + bond.setSmilesStereochemistry(SMILES_BOND_DIRECTION.LSLASH); + assertEquals(SMILES_BOND_DIRECTION.LSLASH, bond.getSmilesStereochemistry()); } - } diff -Nru opsin-1.5.0/opsin-core/src/test/java/uk/ac/cam/ch/wwmm/opsin/CASToolsTest.java opsin-2.3.1/opsin-core/src/test/java/uk/ac/cam/ch/wwmm/opsin/CASToolsTest.java --- opsin-1.5.0/opsin-core/src/test/java/uk/ac/cam/ch/wwmm/opsin/CASToolsTest.java 2013-07-21 14:02:29.000000000 +0000 +++ opsin-2.3.1/opsin-core/src/test/java/uk/ac/cam/ch/wwmm/opsin/CASToolsTest.java 2017-07-23 20:55:18.000000000 +0000 @@ -1,6 +1,6 @@ package uk.ac.cam.ch.wwmm.opsin; -import static junit.framework.Assert.assertEquals; +import static org.junit.Assert.*; import java.io.IOException; @@ -94,7 +94,7 @@ @Test public void cas12() throws ParsingException{ String name = CASTools.uninvertCASName("Ethanimidic acid, N-nitro-, (1Z)-", parseRules); - assertEquals("N-nitro-(1Z)-Ethanimidic acid", name); + assertEquals("(1Z)-N-nitro-Ethanimidic acid", name); } @Test @@ -156,6 +156,20 @@ String name = CASTools.uninvertCASName("L-Alanine, N-carboxy-, 1-ethyl ester", parseRules); assertEquals("1-ethyl N-carboxy-L-Alaninate", name); } + + @Test + public void cas23() throws ParsingException{ + String name = CASTools.uninvertCASName("Pyridine, 3-(tetrahydro-2H-pyran-2-yl)-, (S)-", parseRules); + assertEquals("(S)-3-(tetrahydro-2H-pyran-2-yl)-Pyridine", name); + } + + @Test + public void cas24() throws ParsingException{ + String name = CASTools.uninvertCASName("Pyrrolo[1,2-a]pyrimidinium, 1-[4-[(aminoiminomethyl)amino]butyl]-7-[[2-[(aminoiminomethyl)-amino]ethyl]thio]-6-(11-dodecenyl)-2,3,4,6,7,8-hexahydro-6-hydroxy-, chloride, dihydrochloride", parseRules); + assertEquals("1-[4-[(aminoiminomethyl)amino]butyl]-7-[[2-[(aminoiminomethyl)-amino]ethyl]thio]-6-(11-dodecenyl)-2,3,4,6,7,8-hexahydro-6-hydroxy-Pyrrolo[1,2-a]pyrimidinium chloride dihydrochloride", name); + } + + @Test(expected=ParsingException.class) public void notCas1() throws ParsingException{ diff -Nru opsin-1.5.0/opsin-core/src/test/java/uk/ac/cam/ch/wwmm/opsin/ComponentGeneration_AmbiguitiesAndIrregularitiesTest.java opsin-2.3.1/opsin-core/src/test/java/uk/ac/cam/ch/wwmm/opsin/ComponentGeneration_AmbiguitiesAndIrregularitiesTest.java --- opsin-1.5.0/opsin-core/src/test/java/uk/ac/cam/ch/wwmm/opsin/ComponentGeneration_AmbiguitiesAndIrregularitiesTest.java 2013-07-21 14:02:29.000000000 +0000 +++ opsin-2.3.1/opsin-core/src/test/java/uk/ac/cam/ch/wwmm/opsin/ComponentGeneration_AmbiguitiesAndIrregularitiesTest.java 2017-07-23 20:55:18.000000000 +0000 @@ -1,24 +1,21 @@ package uk.ac.cam.ch.wwmm.opsin; +import static org.junit.Assert.*; import static uk.ac.cam.ch.wwmm.opsin.XmlDeclarations.*; -import static junit.framework.Assert.*; import org.junit.Test; -import nu.xom.Attribute; -import nu.xom.Element; - public class ComponentGeneration_AmbiguitiesAndIrregularitiesTest { @Test public void testCorrectlyTokenisedAlkane(){ - Element substituent = new Element(SUBSTITUENT_EL); - Element alkaneComponent1 = new Element(ALKANESTEMCOMPONENT); + Element substituent = new GroupingEl(SUBSTITUENT_EL); + Element alkaneComponent1 = new TokenEl(ALKANESTEMCOMPONENT); alkaneComponent1.addAttribute(new Attribute(VALUE_ATR, "4")); - Element alkaneComponent2 = new Element(ALKANESTEMCOMPONENT); + Element alkaneComponent2 = new TokenEl(ALKANESTEMCOMPONENT); alkaneComponent2.addAttribute(new Attribute(VALUE_ATR, "10")); - substituent.appendChild(alkaneComponent1); - substituent.appendChild(alkaneComponent2); + substituent.addChild(alkaneComponent1); + substituent.addChild(alkaneComponent2); try{ ComponentGenerator.resolveAmbiguities(substituent); } @@ -29,14 +26,14 @@ @Test public void testCorrectlyTokenisedAlkane2(){ - Element substituent = new Element(SUBSTITUENT_EL); - Element multiplier = new Element(MULTIPLIER_EL); + Element substituent = new GroupingEl(SUBSTITUENT_EL); + Element multiplier = new TokenEl(MULTIPLIER_EL); multiplier.addAttribute(new Attribute(TYPE_ATR, BASIC_TYPE_VAL)); multiplier.addAttribute(new Attribute(VALUE_ATR, "2")); - Element alkaneComponent = new Element(ALKANESTEMCOMPONENT); + Element alkaneComponent = new TokenEl(ALKANESTEMCOMPONENT); alkaneComponent.addAttribute(new Attribute(VALUE_ATR, "10")); - substituent.appendChild(multiplier); - substituent.appendChild(alkaneComponent); + substituent.addChild(multiplier); + substituent.addChild(alkaneComponent); try{ ComponentGenerator.resolveAmbiguities(substituent); } @@ -48,14 +45,14 @@ @Test public void testCorrectlyTokenisedAlkane3(){//unambiguously 6 hexanes - Element substituent = new Element(SUBSTITUENT_EL); - Element multiplier = new Element(MULTIPLIER_EL); + Element substituent = new GroupingEl(SUBSTITUENT_EL); + Element multiplier = new TokenEl(MULTIPLIER_EL); multiplier.addAttribute(new Attribute(TYPE_ATR, BASIC_TYPE_VAL)); multiplier.addAttribute(new Attribute(VALUE_ATR, "6")); - Element alkaneComponent = new Element(ALKANESTEMCOMPONENT); + Element alkaneComponent = new TokenEl(ALKANESTEMCOMPONENT); alkaneComponent.addAttribute(new Attribute(VALUE_ATR, "6")); - substituent.appendChild(multiplier); - substituent.appendChild(alkaneComponent); + substituent.addChild(multiplier); + substituent.addChild(alkaneComponent); try{ ComponentGenerator.resolveAmbiguities(substituent); } @@ -66,30 +63,29 @@ @Test(expected=ComponentGenerationException.class)//tetradec is 14 not 4 x 10 public void testMisTokenisedAlkane() throws ComponentGenerationException{ - Element substituent = new Element(SUBSTITUENT_EL); - Element erroneousMultiplier = new Element(MULTIPLIER_EL); + Element substituent = new GroupingEl(SUBSTITUENT_EL); + Element erroneousMultiplier = new TokenEl(MULTIPLIER_EL); erroneousMultiplier.addAttribute(new Attribute(TYPE_ATR, BASIC_TYPE_VAL)); erroneousMultiplier.addAttribute(new Attribute(VALUE_ATR, "4")); - Element alkaneComponent2 = new Element(ALKANESTEMCOMPONENT); + Element alkaneComponent2 = new TokenEl(ALKANESTEMCOMPONENT); alkaneComponent2.addAttribute(new Attribute(VALUE_ATR, "10")); - substituent.appendChild(erroneousMultiplier); - substituent.appendChild(alkaneComponent2); + substituent.addChild(erroneousMultiplier); + substituent.addChild(alkaneComponent2); ComponentGenerator.resolveAmbiguities(substituent); } @Test public void testLocantsIndicatingTokenizationIsCorrect(){//should be a group multiplier formally - Element substituent = new Element(SUBSTITUENT_EL); - Element locant = new Element(LOCANT_EL); - locant.appendChild("1,2,3,4"); - substituent.appendChild(locant); - Element multiplier = new Element(MULTIPLIER_EL); + Element substituent = new GroupingEl(SUBSTITUENT_EL); + Element locant = new TokenEl(LOCANT_EL, "1,2,3,4"); + substituent.addChild(locant); + Element multiplier = new TokenEl(MULTIPLIER_EL); multiplier.addAttribute(new Attribute(TYPE_ATR, BASIC_TYPE_VAL)); multiplier.addAttribute(new Attribute(VALUE_ATR, "4")); - Element alkaneComponent = new Element(ALKANESTEMCOMPONENT); + Element alkaneComponent = new TokenEl(ALKANESTEMCOMPONENT); alkaneComponent.addAttribute(new Attribute(VALUE_ATR, "10")); - substituent.appendChild(multiplier); - substituent.appendChild(alkaneComponent); + substituent.addChild(multiplier); + substituent.addChild(alkaneComponent); try{ ComponentGenerator.resolveAmbiguities(substituent); } @@ -100,67 +96,60 @@ @Test(expected=ComponentGenerationException.class)//tetradec is 14 not 4 x 10 public void testLocantsIndicatingTokenizationIsIncorrect() throws ComponentGenerationException{ - Element substituent = new Element(SUBSTITUENT_EL); - Element locant = new Element(LOCANT_EL); - locant.appendChild("1"); - substituent.appendChild(locant); - Element erroneousMultiplier = new Element(MULTIPLIER_EL); + Element substituent = new GroupingEl(SUBSTITUENT_EL); + Element locant = new TokenEl(LOCANT_EL, "1"); + substituent.addChild(locant); + Element erroneousMultiplier = new TokenEl(MULTIPLIER_EL); erroneousMultiplier.addAttribute(new Attribute(TYPE_ATR, BASIC_TYPE_VAL)); erroneousMultiplier.addAttribute(new Attribute(VALUE_ATR, "4")); - Element alkaneComponent = new Element(ALKANESTEMCOMPONENT); + Element alkaneComponent = new TokenEl(ALKANESTEMCOMPONENT); alkaneComponent.addAttribute(new Attribute(VALUE_ATR, "10")); - substituent.appendChild(erroneousMultiplier); - substituent.appendChild(alkaneComponent); + substituent.addChild(erroneousMultiplier); + substituent.addChild(alkaneComponent); ComponentGenerator.resolveAmbiguities(substituent); } @Test(expected=ComponentGenerationException.class) public void testTetraphenShouldBeTetra_Phen1() throws ComponentGenerationException{//tetraphenyl - Element substituent = new Element(SUBSTITUENT_EL); - Element multiplier = new Element(MULTIPLIER_EL); + Element substituent = new GroupingEl(SUBSTITUENT_EL); + Element multiplier = new TokenEl(MULTIPLIER_EL); multiplier.addAttribute(new Attribute(TYPE_ATR, BASIC_TYPE_VAL)); multiplier.addAttribute(new Attribute(VALUE_ATR, "4")); - Element phen = new Element(HYDROCARBONFUSEDRINGSYSTEM_EL); - phen.appendChild("phen"); - Element yl = new Element(SUFFIX_EL); - yl.appendChild("yl"); - substituent.appendChild(multiplier); - substituent.appendChild(phen); - substituent.appendChild(yl); + Element phen = new TokenEl(HYDROCARBONFUSEDRINGSYSTEM_EL, "phen"); + Element yl = new TokenEl(SUFFIX_EL, "yl"); + substituent.addChild(multiplier); + substituent.addChild(phen); + substituent.addChild(yl); ComponentGenerator.resolveAmbiguities(substituent); } @Test(expected=ComponentGenerationException.class) public void testTetraphenShouldBeTetra_Phen2() throws ComponentGenerationException{//tetraphenoxy - Element substituent = new Element(SUBSTITUENT_EL); - Element multiplier = new Element(MULTIPLIER_EL); + Element substituent = new GroupingEl(SUBSTITUENT_EL); + Element multiplier = new TokenEl(MULTIPLIER_EL); multiplier.addAttribute(new Attribute(TYPE_ATR, BASIC_TYPE_VAL)); multiplier.addAttribute(new Attribute(VALUE_ATR, "4")); - Element phen = new Element(HYDROCARBONFUSEDRINGSYSTEM_EL); - phen.appendChild("phen"); - Element yl = new Element(SUFFIX_EL); - yl.appendChild("oxy"); - substituent.appendChild(multiplier); - substituent.appendChild(phen); - substituent.appendChild(yl); + Element phen = new TokenEl(HYDROCARBONFUSEDRINGSYSTEM_EL, "phen"); + Element yl = new TokenEl(SUFFIX_EL, "oxy"); + substituent.addChild(multiplier); + substituent.addChild(phen); + substituent.addChild(yl); ComponentGenerator.resolveAmbiguities(substituent); } @Test public void testTetraphenShouldBeTetraphen1(){//tetrapheneyl - Element substituent = new Element(SUBSTITUENT_EL); - Element multiplier = new Element(MULTIPLIER_EL); + Element substituent = new GroupingEl(SUBSTITUENT_EL); + Element multiplier = new TokenEl(MULTIPLIER_EL); multiplier.addAttribute(new Attribute(TYPE_ATR, BASIC_TYPE_VAL)); multiplier.addAttribute(new Attribute(VALUE_ATR, "4")); - Element phen = new Element(HYDROCARBONFUSEDRINGSYSTEM_EL); + Element phen = new TokenEl(HYDROCARBONFUSEDRINGSYSTEM_EL, "phen"); phen.addAttribute(new Attribute(SUBSEQUENTUNSEMANTICTOKEN_ATR, "e")); - phen.appendChild("phen"); - Element yl = new Element(SUFFIX_EL); - yl.appendChild("yl"); - substituent.appendChild(multiplier); - substituent.appendChild(phen); - substituent.appendChild(yl); + Element yl = new TokenEl(SUFFIX_EL, "yl"); + substituent.addChild(multiplier); + substituent.addChild(phen); + substituent.addChild(yl); try{ ComponentGenerator.resolveAmbiguities(substituent); } @@ -171,20 +160,17 @@ @Test public void testTetraphenShouldBeTetraphen2(){//tetraphen2yl - Element substituent = new Element(SUBSTITUENT_EL); - Element multiplier = new Element(MULTIPLIER_EL); + Element substituent = new GroupingEl(SUBSTITUENT_EL); + Element multiplier = new TokenEl(MULTIPLIER_EL); multiplier.addAttribute(new Attribute(TYPE_ATR, BASIC_TYPE_VAL)); multiplier.addAttribute(new Attribute(VALUE_ATR, "4")); - Element phen = new Element(HYDROCARBONFUSEDRINGSYSTEM_EL); - phen.appendChild("phen"); - Element locant = new Element(LOCANT_EL); - locant.appendChild("2"); - Element yl = new Element(SUFFIX_EL); - yl.appendChild("yl"); - substituent.appendChild(multiplier); - substituent.appendChild(phen); - substituent.appendChild(locant); - substituent.appendChild(yl); + Element phen = new TokenEl(HYDROCARBONFUSEDRINGSYSTEM_EL, "phen"); + Element locant = new TokenEl(LOCANT_EL, "2"); + Element yl = new TokenEl(SUFFIX_EL, "yl"); + substituent.addChild(multiplier); + substituent.addChild(phen); + substituent.addChild(locant); + substituent.addChild(yl); try{ ComponentGenerator.resolveAmbiguities(substituent); } @@ -195,20 +181,17 @@ @Test public void testTetraphenShouldBeTetraphen3(){//2tetraphenyl - Element substituent = new Element(SUBSTITUENT_EL); - Element locant = new Element(LOCANT_EL); - locant.appendChild("2"); - Element multiplier = new Element(MULTIPLIER_EL); + Element substituent = new GroupingEl(SUBSTITUENT_EL); + Element locant = new TokenEl(LOCANT_EL, "2"); + Element multiplier = new TokenEl(MULTIPLIER_EL); multiplier.addAttribute(new Attribute(TYPE_ATR, BASIC_TYPE_VAL)); multiplier.addAttribute(new Attribute(VALUE_ATR, "4")); - Element phen = new Element(HYDROCARBONFUSEDRINGSYSTEM_EL); - phen.appendChild("phen"); - Element yl = new Element(SUFFIX_EL); - yl.appendChild("yl"); - substituent.appendChild(locant); - substituent.appendChild(multiplier); - substituent.appendChild(phen); - substituent.appendChild(yl); + Element phen = new TokenEl(HYDROCARBONFUSEDRINGSYSTEM_EL, "phen"); + Element yl = new TokenEl(SUFFIX_EL, "yl"); + substituent.addChild(locant); + substituent.addChild(multiplier); + substituent.addChild(phen); + substituent.addChild(yl); try{ ComponentGenerator.resolveAmbiguities(substituent); } diff -Nru opsin-1.5.0/opsin-core/src/test/java/uk/ac/cam/ch/wwmm/opsin/ComponentGeneration_MiscTest.java opsin-2.3.1/opsin-core/src/test/java/uk/ac/cam/ch/wwmm/opsin/ComponentGeneration_MiscTest.java --- opsin-1.5.0/opsin-core/src/test/java/uk/ac/cam/ch/wwmm/opsin/ComponentGeneration_MiscTest.java 1970-01-01 00:00:00.000000000 +0000 +++ opsin-2.3.1/opsin-core/src/test/java/uk/ac/cam/ch/wwmm/opsin/ComponentGeneration_MiscTest.java 2017-07-23 20:55:18.000000000 +0000 @@ -0,0 +1,57 @@ +package uk.ac.cam.ch.wwmm.opsin; + +import static org.junit.Assert.assertEquals; +import static uk.ac.cam.ch.wwmm.opsin.XmlDeclarations.*; + +import org.junit.Test; + +public class ComponentGeneration_MiscTest { + + @Test(expected=ComponentGenerationException.class) + public void testRejectSingleComponentSaltComponent() throws ComponentGenerationException { + //reject "hydrate" + Element molecule = new GroupingEl(MOLECULE_EL); + Element wordRule = new GroupingEl(WORDRULE_EL); + Element word = new GroupingEl(WORD_EL); + Element root = new GroupingEl(ROOT_EL); + Element group = new TokenEl(GROUP_EL); + group.addAttribute(new Attribute(TYPE_ATR, SIMPLEGROUP_TYPE_VAL)); + group.addAttribute(new Attribute(SUBTYPE_ATR, SALTCOMPONENT_SUBTYPE_VAL)); + root.addChild(group); + word.addChild(root); + wordRule.addChild(word); + molecule.addChild(wordRule); + processComponents(molecule); + } + + @Test + public void testNumericallyMultipliedSaltComponent() throws ComponentGenerationException { + Element molecule = new GroupingEl(MOLECULE_EL); + molecule.addChild(new GroupingEl(WORDRULE_EL)); + + Element wordRule = new GroupingEl(WORDRULE_EL); + Element word = new GroupingEl(WORD_EL); + Element root = new GroupingEl(ROOT_EL); + Element group = new TokenEl(GROUP_EL); + group.addAttribute(new Attribute(TYPE_ATR, SIMPLEGROUP_TYPE_VAL)); + group.addAttribute(new Attribute(SUBTYPE_ATR, SALTCOMPONENT_SUBTYPE_VAL)); + group.setValue("2hcl"); + root.addChild(group); + word.addChild(root); + wordRule.addChild(word); + molecule.addChild(wordRule); + processComponents(molecule); + assertEquals(2, root.getChildCount()); + Element multiplier = root.getChild(0); + assertEquals(MULTIPLIER_EL, multiplier.getName()); + assertEquals("2", multiplier.getAttributeValue(VALUE_ATR)); + assertEquals("2", multiplier.getValue()); + Element updatedGroup = root.getChild(1); + assertEquals("hcl", updatedGroup.getValue()); + } + + private void processComponents(Element parse) throws ComponentGenerationException { + new ComponentGenerator(new NameToStructureConfig()).processParse(parse); + } + +} diff -Nru opsin-1.5.0/opsin-core/src/test/java/uk/ac/cam/ch/wwmm/opsin/ComponentGeneration_ProcesslocantsTest.java opsin-2.3.1/opsin-core/src/test/java/uk/ac/cam/ch/wwmm/opsin/ComponentGeneration_ProcesslocantsTest.java --- opsin-1.5.0/opsin-core/src/test/java/uk/ac/cam/ch/wwmm/opsin/ComponentGeneration_ProcesslocantsTest.java 2013-07-21 14:02:29.000000000 +0000 +++ opsin-2.3.1/opsin-core/src/test/java/uk/ac/cam/ch/wwmm/opsin/ComponentGeneration_ProcesslocantsTest.java 2017-07-23 20:55:18.000000000 +0000 @@ -1,11 +1,8 @@ package uk.ac.cam.ch.wwmm.opsin; -import static junit.framework.Assert.*; +import static org.junit.Assert.*; import static uk.ac.cam.ch.wwmm.opsin.XmlDeclarations.*; -import nu.xom.Attribute; -import nu.xom.Element; - import org.junit.Before; import org.junit.Test; @@ -16,22 +13,22 @@ @Before public void setUpSubstituent(){ - substituent = new Element(SUBSTITUENT_EL); - locant = new Element(LOCANT_EL); - substituent.appendChild(locant); - substituent.appendChild(new Element(GROUP_EL));//a dummy element to give the locant a potential purpose + substituent = new GroupingEl(SUBSTITUENT_EL); + locant = new TokenEl(LOCANT_EL); + substituent.addChild(locant); + substituent.addChild(new TokenEl(GROUP_EL));//a dummy element to give the locant a potential purpose } @Test public void testCardinalNumber() throws ComponentGenerationException { - locant.appendChild("1"); + locant.setValue("1"); ComponentGenerator.processLocants(substituent); assertEquals("1", locant.getValue()); } @Test public void testCardinalNumberWithHyphen() throws ComponentGenerationException { - locant.appendChild("1-"); + locant.setValue("1-"); ComponentGenerator.processLocants(substituent); assertEquals("1", locant.getValue()); } @@ -39,232 +36,275 @@ @Test public void testElementSymbol() throws ComponentGenerationException { - locant.appendChild("N-"); + locant.setValue("N-"); ComponentGenerator.processLocants(substituent); assertEquals("N", locant.getValue()); } @Test public void testAminoAcidStyleLocant() throws ComponentGenerationException { - locant.appendChild("N1-"); + locant.setValue("N1-"); ComponentGenerator.processLocants(substituent); assertEquals("N1", locant.getValue()); } @Test public void testCompoundLocant() throws ComponentGenerationException { - locant.appendChild("1(10)-"); + locant.setValue("1(10)-"); ComponentGenerator.processLocants(substituent); assertEquals("1(10)", locant.getValue()); } @Test public void testGreek() throws ComponentGenerationException { - locant.appendChild("alpha"); + locant.setValue("alpha"); ComponentGenerator.processLocants(substituent); assertEquals("alpha", locant.getValue()); } @Test public void testNotlowercase1() throws ComponentGenerationException { - locant.appendChild("AlPhA-"); + locant.setValue("AlPhA-"); ComponentGenerator.processLocants(substituent); assertEquals("alpha", locant.getValue()); } @Test public void testNotlowercase2() throws ComponentGenerationException { - locant.appendChild("NAlPhA-"); + locant.setValue("NAlPhA-"); ComponentGenerator.processLocants(substituent); assertEquals("Nalpha", locant.getValue()); } @Test public void testIUPAC2004() throws ComponentGenerationException { - locant.appendChild("2-N-"); + locant.setValue("2-N-"); ComponentGenerator.processLocants(substituent); assertEquals("N2", locant.getValue()); } @Test public void testSuperscript1() throws ComponentGenerationException { - locant.appendChild("N^(2)"); + locant.setValue("N^(2)"); ComponentGenerator.processLocants(substituent); assertEquals("N2", locant.getValue()); } @Test public void testSuperscript2() throws ComponentGenerationException { - locant.appendChild("N^2"); + locant.setValue("N^2"); ComponentGenerator.processLocants(substituent); assertEquals("N2", locant.getValue()); } @Test public void testSuperscript3() throws ComponentGenerationException { - locant.appendChild("N(2)"); + locant.setValue("N(2)"); ComponentGenerator.processLocants(substituent); assertEquals("N2", locant.getValue()); } @Test public void testSuperscript4() throws ComponentGenerationException { - locant.appendChild("N~12~"); + locant.setValue("N~12~"); ComponentGenerator.processLocants(substituent); assertEquals("N12", locant.getValue()); } @Test public void testSuperscript5() throws ComponentGenerationException { - locant.appendChild("N(alpha)"); + locant.setValue("N(alpha)"); ComponentGenerator.processLocants(substituent); assertEquals("Nalpha", locant.getValue()); } @Test public void testSuperscript6() throws ComponentGenerationException { - locant.appendChild("N^alpha"); + locant.setValue("N^alpha"); ComponentGenerator.processLocants(substituent); assertEquals("Nalpha", locant.getValue()); } @Test + public void testSuperscript7() throws ComponentGenerationException { + locant.setValue("N*12*"); + ComponentGenerator.processLocants(substituent); + assertEquals("N12", locant.getValue()); + } + + @Test public void testAddedHydrogen() throws ComponentGenerationException { - locant.appendChild("3(5'H)"); + locant.setValue("3(5'H)"); ComponentGenerator.processLocants(substituent); assertEquals("3", locant.getValue()); assertEquals(ADDEDHYDROGENLOCANT_TYPE_VAL, locant.getAttributeValue(TYPE_ATR)); - Element addedHydrogen = (Element) XOMTools.getPreviousSibling(locant); + Element addedHydrogen = OpsinTools.getPreviousSibling(locant); assertNotNull(addedHydrogen); - assertEquals(ADDEDHYDROGEN_EL, addedHydrogen.getLocalName()); + assertEquals(ADDEDHYDROGEN_EL, addedHydrogen.getName()); assertEquals("5'", addedHydrogen.getAttributeValue(LOCANT_ATR)); } @Test public void testAddedHydrogen2() throws ComponentGenerationException { - locant.appendChild("1,2(2H,7H)"); + locant.setValue("1,2(2H,7H)"); ComponentGenerator.processLocants(substituent); assertEquals("1,2", locant.getValue()); assertEquals(ADDEDHYDROGENLOCANT_TYPE_VAL, locant.getAttributeValue(TYPE_ATR)); - Element addedHydrogen1 = (Element) XOMTools.getPreviousSibling(locant); + Element addedHydrogen1 = OpsinTools.getPreviousSibling(locant); assertNotNull(addedHydrogen1); - assertEquals(ADDEDHYDROGEN_EL, addedHydrogen1.getLocalName()); + assertEquals(ADDEDHYDROGEN_EL, addedHydrogen1.getName()); assertEquals("7", addedHydrogen1.getAttributeValue(LOCANT_ATR)); - Element addedHydrogen2 = (Element) XOMTools.getPreviousSibling(addedHydrogen1); + Element addedHydrogen2 = OpsinTools.getPreviousSibling(addedHydrogen1); assertNotNull(addedHydrogen2); - assertEquals(ADDEDHYDROGEN_EL, addedHydrogen2.getLocalName()); + assertEquals(ADDEDHYDROGEN_EL, addedHydrogen2.getName()); assertEquals("2", addedHydrogen2.getAttributeValue(LOCANT_ATR)); } @Test public void testStereochemistryInLocant1() throws ComponentGenerationException { - locant.appendChild("5(R)"); + locant.setValue("5(R)"); ComponentGenerator.processLocants(substituent); assertEquals("5", locant.getValue()); - Element stereochemistry = (Element) XOMTools.getPreviousSibling(locant); + Element stereochemistry = OpsinTools.getPreviousSibling(locant); assertNotNull(stereochemistry); - assertEquals(STEREOCHEMISTRY_EL, stereochemistry.getLocalName()); + assertEquals(STEREOCHEMISTRY_EL, stereochemistry.getName()); assertEquals(STEREOCHEMISTRYBRACKET_TYPE_VAL, stereochemistry.getAttributeValue(TYPE_ATR)); assertEquals("(5R)", stereochemistry.getValue());//will be handled by process stereochemistry function } @Test public void testStereochemistryInLocant2() throws ComponentGenerationException { - locant.appendChild("5-(S)"); + locant.setValue("5-(S)"); ComponentGenerator.processLocants(substituent); assertEquals("5", locant.getValue()); - Element stereochemistry = (Element) XOMTools.getPreviousSibling(locant); + Element stereochemistry = OpsinTools.getPreviousSibling(locant); assertNotNull(stereochemistry); - assertEquals(STEREOCHEMISTRY_EL, stereochemistry.getLocalName()); + assertEquals(STEREOCHEMISTRY_EL, stereochemistry.getName()); assertEquals(STEREOCHEMISTRYBRACKET_TYPE_VAL, stereochemistry.getAttributeValue(TYPE_ATR)); assertEquals("(5S)", stereochemistry.getValue());//will be handled by process stereochemistry function } @Test public void testStereochemistryInLocant3() throws ComponentGenerationException { - locant.appendChild("N(3)-(S)"); + locant.setValue("N(3)-(S)"); ComponentGenerator.processLocants(substituent); assertEquals("N3", locant.getValue()); - Element stereochemistry = (Element) XOMTools.getPreviousSibling(locant); + Element stereochemistry = OpsinTools.getPreviousSibling(locant); assertNotNull(stereochemistry); - assertEquals(STEREOCHEMISTRY_EL, stereochemistry.getLocalName()); + assertEquals(STEREOCHEMISTRY_EL, stereochemistry.getName()); assertEquals(STEREOCHEMISTRYBRACKET_TYPE_VAL, stereochemistry.getAttributeValue(TYPE_ATR)); assertEquals("(N3S)", stereochemistry.getValue());//will be handled by process stereochemistry function } @Test + public void testStereochemistryInLocant4() throws ComponentGenerationException { + locant.setValue("5(RS)"); + ComponentGenerator.processLocants(substituent); + assertEquals("5", locant.getValue()); + Element stereochemistry = OpsinTools.getPreviousSibling(locant); + assertNotNull(stereochemistry); + assertEquals(STEREOCHEMISTRY_EL, stereochemistry.getName()); + assertEquals(STEREOCHEMISTRYBRACKET_TYPE_VAL, stereochemistry.getAttributeValue(TYPE_ATR)); + assertEquals("(5RS)", stereochemistry.getValue());//will be handled by process stereochemistry function + } + + @Test + public void testStereochemistryInLocant5() throws ComponentGenerationException { + locant.setValue("5(R,S)"); + ComponentGenerator.processLocants(substituent); + assertEquals("5", locant.getValue()); + Element stereochemistry = OpsinTools.getPreviousSibling(locant); + assertNotNull(stereochemistry); + assertEquals(STEREOCHEMISTRY_EL, stereochemistry.getName()); + assertEquals(STEREOCHEMISTRYBRACKET_TYPE_VAL, stereochemistry.getAttributeValue(TYPE_ATR)); + assertEquals("(5RS)", stereochemistry.getValue());//will be handled by process stereochemistry function + } + + @Test + public void testStereochemistryInLocant6() throws ComponentGenerationException { + locant.setValue("5(R/S)"); + ComponentGenerator.processLocants(substituent); + assertEquals("5", locant.getValue()); + Element stereochemistry = OpsinTools.getPreviousSibling(locant); + assertNotNull(stereochemistry); + assertEquals(STEREOCHEMISTRY_EL, stereochemistry.getName()); + assertEquals(STEREOCHEMISTRYBRACKET_TYPE_VAL, stereochemistry.getAttributeValue(TYPE_ATR)); + assertEquals("(5RS)", stereochemistry.getValue());//will be handled by process stereochemistry function + } + + @Test public void testMultipleCardinals() throws ComponentGenerationException { - locant.appendChild("2,3-"); + locant.setValue("2,3-"); ComponentGenerator.processLocants(substituent); assertEquals("2,3", locant.getValue()); } @Test public void testMultipleTypesTogether() throws ComponentGenerationException { - locant.appendChild("2,N5,GaMMa,3-N,N^3,N(2),N~10~,4(5H),3-N(S),1(6)-"); + locant.setValue("2,N5,GaMMa,3-N,N^3,N(2),N~10~,4(5H),3-N(S),1(6)-"); ComponentGenerator.processLocants(substituent); assertEquals("2,N5,gamma,N3,N3,N2,N10,4,N3,1(6)", locant.getValue()); assertEquals(ADDEDHYDROGENLOCANT_TYPE_VAL, locant.getAttributeValue(TYPE_ATR)); - Element stereochemistry = (Element) XOMTools.getPreviousSibling(locant); + Element stereochemistry = OpsinTools.getPreviousSibling(locant); assertNotNull(stereochemistry); - assertEquals(STEREOCHEMISTRY_EL, stereochemistry.getLocalName()); + assertEquals(STEREOCHEMISTRY_EL, stereochemistry.getName()); assertEquals(STEREOCHEMISTRYBRACKET_TYPE_VAL, stereochemistry.getAttributeValue(TYPE_ATR)); assertEquals("(N3S)", stereochemistry.getValue()); - Element addedHydrogen = (Element) XOMTools.getPreviousSibling(stereochemistry); + Element addedHydrogen = OpsinTools.getPreviousSibling(stereochemistry); assertNotNull(addedHydrogen); - assertEquals(ADDEDHYDROGEN_EL, addedHydrogen.getLocalName()); + assertEquals(ADDEDHYDROGEN_EL, addedHydrogen.getName()); assertEquals("5", addedHydrogen.getAttributeValue(LOCANT_ATR)); } @Test public void testCarbohydrateStyleLocants() throws ComponentGenerationException { //2,4,6-tri-O - locant.appendChild("O"); - Element multiplier = new Element(MULTIPLIER_EL); + locant.setValue("O"); + Element multiplier = new TokenEl(MULTIPLIER_EL); multiplier.addAttribute(new Attribute(VALUE_ATR, "3")); - XOMTools.insertBefore(locant, multiplier); - Element numericLocant = new Element(LOCANT_EL); - numericLocant.appendChild("2,4,6"); - XOMTools.insertBefore(multiplier, numericLocant); + OpsinTools.insertBefore(locant, multiplier); + Element numericLocant = new TokenEl(LOCANT_EL); + numericLocant.setValue("2,4,6"); + OpsinTools.insertBefore(multiplier, numericLocant); ComponentGenerator.processLocants(substituent); assertEquals("O2,O4,O6", numericLocant.getValue()); - Element group = (Element) XOMTools.getNextSibling(multiplier); + Element group = OpsinTools.getNextSibling(multiplier); assertNotNull(group); - assertEquals(group.getLocalName(), GROUP_EL); + assertEquals(group.getName(), GROUP_EL); } @Test public void testCarbohydrateStyleLocantsNoNumericComponent() throws ComponentGenerationException { //tri-O - locant.appendChild("O"); - Element multiplier = new Element(MULTIPLIER_EL); + locant.setValue("O"); + Element multiplier = new TokenEl(MULTIPLIER_EL); multiplier.addAttribute(new Attribute(VALUE_ATR, "3")); - XOMTools.insertBefore(locant, multiplier); + OpsinTools.insertBefore(locant, multiplier); ComponentGenerator.processLocants(substituent); - Element elBeforeMultiplier = (Element) XOMTools.getPreviousSibling(multiplier); + Element elBeforeMultiplier = OpsinTools.getPreviousSibling(multiplier); assertNotNull("A locant should not be in front of the multiplier", elBeforeMultiplier); - assertEquals(LOCANT_EL, elBeforeMultiplier.getLocalName()); + assertEquals(LOCANT_EL, elBeforeMultiplier.getName()); assertEquals("O,O',O''", elBeforeMultiplier.getValue()); - Element group = (Element) XOMTools.getNextSibling(multiplier); + Element group = OpsinTools.getNextSibling(multiplier); assertNotNull(group); - assertEquals(group.getLocalName(), GROUP_EL); + assertEquals(group.getName(), GROUP_EL); } @Test public void testCarbohydrateStyleLocantsCounterExample() throws ComponentGenerationException { //2,4,6-tri-2 (this is not a carbohydrate style locant) - locant.appendChild("2"); - Element multiplier = new Element(MULTIPLIER_EL); + locant.setValue("2"); + Element multiplier = new TokenEl(MULTIPLIER_EL); multiplier.addAttribute(new Attribute(VALUE_ATR, "3")); - XOMTools.insertBefore(locant, multiplier); - Element numericLocant = new Element(LOCANT_EL); - numericLocant.appendChild("2,4,6"); - XOMTools.insertBefore(multiplier, numericLocant); + OpsinTools.insertBefore(locant, multiplier); + Element numericLocant = new TokenEl(LOCANT_EL); + numericLocant.setValue("2,4,6"); + OpsinTools.insertBefore(multiplier, numericLocant); ComponentGenerator.processLocants(substituent); assertEquals("2,4,6", numericLocant.getValue()); - Element unmodifiedLocant = (Element) XOMTools.getNextSibling(multiplier); + Element unmodifiedLocant = OpsinTools.getNextSibling(multiplier); assertNotNull(unmodifiedLocant); - assertEquals(unmodifiedLocant.getLocalName(), LOCANT_EL); + assertEquals(unmodifiedLocant.getName(), LOCANT_EL); assertEquals("2", unmodifiedLocant.getValue()); } } diff -Nru opsin-1.5.0/opsin-core/src/test/java/uk/ac/cam/ch/wwmm/opsin/ComponentGeneration_StereochemistryTest.java opsin-2.3.1/opsin-core/src/test/java/uk/ac/cam/ch/wwmm/opsin/ComponentGeneration_StereochemistryTest.java --- opsin-1.5.0/opsin-core/src/test/java/uk/ac/cam/ch/wwmm/opsin/ComponentGeneration_StereochemistryTest.java 2013-07-21 14:02:29.000000000 +0000 +++ opsin-2.3.1/opsin-core/src/test/java/uk/ac/cam/ch/wwmm/opsin/ComponentGeneration_StereochemistryTest.java 2017-07-23 20:55:18.000000000 +0000 @@ -1,11 +1,9 @@ package uk.ac.cam.ch.wwmm.opsin; -import static junit.framework.Assert.assertEquals; +import static org.junit.Assert.*; import static uk.ac.cam.ch.wwmm.opsin.XmlDeclarations.*; -import nu.xom.Attribute; -import nu.xom.Element; -import nu.xom.Elements; +import java.util.List; import org.junit.Test; @@ -13,41 +11,39 @@ @Test public void testUnlocantedS() throws ComponentGenerationException { - Element substituent = new Element(SUBSTITUENT_EL); - Element stereochem = new Element(STEREOCHEMISTRY_EL); + Element substituent = new GroupingEl(SUBSTITUENT_EL); + Element stereochem = new TokenEl(STEREOCHEMISTRY_EL, "(S)"); stereochem.addAttribute(new Attribute(TYPE_ATR, STEREOCHEMISTRYBRACKET_TYPE_VAL)); - substituent.appendChild(stereochem); - stereochem.appendChild("(S)"); - ComponentGenerator.processStereochemistry(substituent); + substituent.addChild(stereochem); + processStereochemistry(substituent); - Elements children = substituent.getChildElements(); + List children = substituent.getChildElements(); assertEquals(1, children.size()); Element newStereochemistryEl = children.get(0); - assertEquals(STEREOCHEMISTRY_EL, newStereochemistryEl.getLocalName()); + assertEquals(STEREOCHEMISTRY_EL, newStereochemistryEl.getName()); assertEquals(null, newStereochemistryEl.getAttributeValue(LOCANT_ATR)); assertEquals("S", newStereochemistryEl.getAttributeValue(VALUE_ATR)); assertEquals(R_OR_S_TYPE_VAL, newStereochemistryEl.getAttributeValue(TYPE_ATR)); } - + @Test public void testMultipleUnLocanted() throws ComponentGenerationException { - Element substituent = new Element(SUBSTITUENT_EL); - Element stereochem = new Element(STEREOCHEMISTRY_EL); + Element substituent = new GroupingEl(SUBSTITUENT_EL); + Element stereochem = new TokenEl(STEREOCHEMISTRY_EL, "(R,R)"); stereochem.addAttribute(new Attribute(TYPE_ATR, STEREOCHEMISTRYBRACKET_TYPE_VAL)); - substituent.appendChild(stereochem); - stereochem.appendChild("(R,R)"); - ComponentGenerator.processStereochemistry(substituent); + substituent.addChild(stereochem); + processStereochemistry(substituent); - Elements children = substituent.getChildElements(); + List children = substituent.getChildElements(); assertEquals(2, children.size()); Element newStereochemistryEl1 = children.get(0); - assertEquals(STEREOCHEMISTRY_EL, newStereochemistryEl1.getLocalName()); + assertEquals(STEREOCHEMISTRY_EL, newStereochemistryEl1.getName()); assertEquals(null, newStereochemistryEl1.getAttributeValue(LOCANT_ATR)); assertEquals("R", newStereochemistryEl1.getAttributeValue(VALUE_ATR)); assertEquals(R_OR_S_TYPE_VAL, newStereochemistryEl1.getAttributeValue(TYPE_ATR)); Element newStereochemistryEl2 = children.get(1); - assertEquals(STEREOCHEMISTRY_EL, newStereochemistryEl2.getLocalName()); + assertEquals(STEREOCHEMISTRY_EL, newStereochemistryEl2.getName()); assertEquals(null, newStereochemistryEl2.getAttributeValue(LOCANT_ATR)); assertEquals("R", newStereochemistryEl2.getAttributeValue(VALUE_ATR)); assertEquals(R_OR_S_TYPE_VAL, newStereochemistryEl2.getAttributeValue(TYPE_ATR)); @@ -55,17 +51,16 @@ @Test public void testLocantedR() throws ComponentGenerationException { - Element substituent = new Element(SUBSTITUENT_EL); - Element stereochem = new Element(STEREOCHEMISTRY_EL); + Element substituent = new GroupingEl(SUBSTITUENT_EL); + Element stereochem = new TokenEl(STEREOCHEMISTRY_EL, "(1R)"); stereochem.addAttribute(new Attribute(TYPE_ATR, STEREOCHEMISTRYBRACKET_TYPE_VAL)); - substituent.appendChild(stereochem); - stereochem.appendChild("(1R)"); - ComponentGenerator.processStereochemistry(substituent); + substituent.addChild(stereochem); + processStereochemistry(substituent); - Elements children = substituent.getChildElements(); + List children = substituent.getChildElements(); assertEquals(1, children.size()); Element newStereochemistryEl = children.get(0); - assertEquals(STEREOCHEMISTRY_EL, newStereochemistryEl.getLocalName()); + assertEquals(STEREOCHEMISTRY_EL, newStereochemistryEl.getName()); assertEquals("1", newStereochemistryEl.getAttributeValue(LOCANT_ATR)); assertEquals("R", newStereochemistryEl.getAttributeValue(VALUE_ATR)); assertEquals(R_OR_S_TYPE_VAL, newStereochemistryEl.getAttributeValue(TYPE_ATR)); @@ -73,29 +68,28 @@ @Test public void testMultipleRorSLocanted() throws ComponentGenerationException { - Element substituent = new Element(SUBSTITUENT_EL); - Element stereochem = new Element(STEREOCHEMISTRY_EL); + Element substituent = new GroupingEl(SUBSTITUENT_EL); + Element stereochem = new TokenEl(STEREOCHEMISTRY_EL, "(alphaR,3S,7'S)"); stereochem.addAttribute(new Attribute(TYPE_ATR, STEREOCHEMISTRYBRACKET_TYPE_VAL)); - substituent.appendChild(stereochem); - stereochem.appendChild("(alphaR,3S,7'S)"); - ComponentGenerator.processStereochemistry(substituent); + substituent.addChild(stereochem); + processStereochemistry(substituent); - Elements children = substituent.getChildElements(); + List children = substituent.getChildElements(); assertEquals(3, children.size()); Element newStereochemistryEl1 = children.get(0); - assertEquals(STEREOCHEMISTRY_EL, newStereochemistryEl1.getLocalName()); + assertEquals(STEREOCHEMISTRY_EL, newStereochemistryEl1.getName()); assertEquals("alpha", newStereochemistryEl1.getAttributeValue(LOCANT_ATR)); assertEquals("R", newStereochemistryEl1.getAttributeValue(VALUE_ATR)); assertEquals(R_OR_S_TYPE_VAL, newStereochemistryEl1.getAttributeValue(TYPE_ATR)); Element newStereochemistryEl2 = children.get(1); - assertEquals(STEREOCHEMISTRY_EL, newStereochemistryEl2.getLocalName()); + assertEquals(STEREOCHEMISTRY_EL, newStereochemistryEl2.getName()); assertEquals("3", newStereochemistryEl2.getAttributeValue(LOCANT_ATR)); assertEquals("S", newStereochemistryEl2.getAttributeValue(VALUE_ATR)); assertEquals(R_OR_S_TYPE_VAL, newStereochemistryEl2.getAttributeValue(TYPE_ATR)); Element newStereochemistryEl3 = children.get(2); - assertEquals(STEREOCHEMISTRY_EL, newStereochemistryEl3.getLocalName()); + assertEquals(STEREOCHEMISTRY_EL, newStereochemistryEl3.getName()); assertEquals("7'", newStereochemistryEl3.getAttributeValue(LOCANT_ATR)); assertEquals("S", newStereochemistryEl3.getAttributeValue(VALUE_ATR)); assertEquals(R_OR_S_TYPE_VAL, newStereochemistryEl3.getAttributeValue(TYPE_ATR)); @@ -104,17 +98,16 @@ @Test public void testUnLocantedE() throws ComponentGenerationException { - Element substituent = new Element(SUBSTITUENT_EL); - Element stereochem = new Element(STEREOCHEMISTRY_EL); + Element substituent = new GroupingEl(SUBSTITUENT_EL); + Element stereochem = new TokenEl(STEREOCHEMISTRY_EL, "(E)"); stereochem.addAttribute(new Attribute(TYPE_ATR, STEREOCHEMISTRYBRACKET_TYPE_VAL)); - substituent.appendChild(stereochem); - stereochem.appendChild("(E)"); - ComponentGenerator.processStereochemistry(substituent); + substituent.addChild(stereochem); + processStereochemistry(substituent); - Elements children = substituent.getChildElements(); + List children = substituent.getChildElements(); assertEquals(1, children.size()); Element newStereochemistryEl = children.get(0); - assertEquals(STEREOCHEMISTRY_EL, newStereochemistryEl.getLocalName()); + assertEquals(STEREOCHEMISTRY_EL, newStereochemistryEl.getName()); assertEquals(null, newStereochemistryEl.getAttributeValue(LOCANT_ATR)); assertEquals("E", newStereochemistryEl.getAttributeValue(VALUE_ATR)); assertEquals(E_OR_Z_TYPE_VAL, newStereochemistryEl.getAttributeValue(TYPE_ATR)); @@ -122,17 +115,16 @@ @Test public void testLocantedZ() throws ComponentGenerationException { - Element substituent = new Element(SUBSTITUENT_EL); - Element stereochem = new Element(STEREOCHEMISTRY_EL); + Element substituent = new GroupingEl(SUBSTITUENT_EL); + Element stereochem = new TokenEl(STEREOCHEMISTRY_EL, "(5Z)"); stereochem.addAttribute(new Attribute(TYPE_ATR, STEREOCHEMISTRYBRACKET_TYPE_VAL)); - substituent.appendChild(stereochem); - stereochem.appendChild("(5Z)"); - ComponentGenerator.processStereochemistry(substituent); + substituent.addChild(stereochem); + processStereochemistry(substituent); - Elements children = substituent.getChildElements(); + List children = substituent.getChildElements(); assertEquals(1, children.size()); Element newStereochemistryEl = children.get(0); - assertEquals(STEREOCHEMISTRY_EL, newStereochemistryEl.getLocalName()); + assertEquals(STEREOCHEMISTRY_EL, newStereochemistryEl.getName()); assertEquals("5", newStereochemistryEl.getAttributeValue(LOCANT_ATR)); assertEquals("Z", newStereochemistryEl.getAttributeValue(VALUE_ATR)); assertEquals(E_OR_Z_TYPE_VAL, newStereochemistryEl.getAttributeValue(TYPE_ATR)); @@ -140,29 +132,28 @@ @Test public void testMultipleRorSorEorZ() throws ComponentGenerationException { - Element substituent = new Element(SUBSTITUENT_EL); - Element stereochem = new Element(STEREOCHEMISTRY_EL); + Element substituent = new GroupingEl(SUBSTITUENT_EL); + Element stereochem = new TokenEl(STEREOCHEMISTRY_EL, "(NZ,2E,R)"); stereochem.addAttribute(new Attribute(TYPE_ATR, STEREOCHEMISTRYBRACKET_TYPE_VAL)); - substituent.appendChild(stereochem); - stereochem.appendChild("(NZ,2E,R)"); - ComponentGenerator.processStereochemistry(substituent); + substituent.addChild(stereochem); + processStereochemistry(substituent); - Elements children = substituent.getChildElements(); + List children = substituent.getChildElements(); assertEquals(3, children.size()); Element newStereochemistryEl1 = children.get(0); - assertEquals(STEREOCHEMISTRY_EL, newStereochemistryEl1.getLocalName()); + assertEquals(STEREOCHEMISTRY_EL, newStereochemistryEl1.getName()); assertEquals("N", newStereochemistryEl1.getAttributeValue(LOCANT_ATR)); assertEquals("Z", newStereochemistryEl1.getAttributeValue(VALUE_ATR)); assertEquals(E_OR_Z_TYPE_VAL, newStereochemistryEl1.getAttributeValue(TYPE_ATR)); Element newStereochemistryEl2 = children.get(1); - assertEquals(STEREOCHEMISTRY_EL, newStereochemistryEl2.getLocalName()); + assertEquals(STEREOCHEMISTRY_EL, newStereochemistryEl2.getName()); assertEquals("2", newStereochemistryEl2.getAttributeValue(LOCANT_ATR)); assertEquals("E", newStereochemistryEl2.getAttributeValue(VALUE_ATR)); assertEquals(E_OR_Z_TYPE_VAL, newStereochemistryEl2.getAttributeValue(TYPE_ATR)); Element newStereochemistryEl3 = children.get(2); - assertEquals(STEREOCHEMISTRY_EL, newStereochemistryEl3.getLocalName()); + assertEquals(STEREOCHEMISTRY_EL, newStereochemistryEl3.getName()); assertEquals(null, newStereochemistryEl3.getAttributeValue(LOCANT_ATR)); assertEquals("R", newStereochemistryEl3.getAttributeValue(VALUE_ATR)); assertEquals(R_OR_S_TYPE_VAL, newStereochemistryEl3.getAttributeValue(TYPE_ATR)); @@ -170,29 +161,28 @@ @Test public void testDashInsteadOfComma() throws ComponentGenerationException { - Element substituent = new Element(SUBSTITUENT_EL); - Element stereochem = new Element(STEREOCHEMISTRY_EL); + Element substituent = new GroupingEl(SUBSTITUENT_EL); + Element stereochem = new TokenEl(STEREOCHEMISTRY_EL, "(NZ,2E-R)"); stereochem.addAttribute(new Attribute(TYPE_ATR, STEREOCHEMISTRYBRACKET_TYPE_VAL)); - substituent.appendChild(stereochem); - stereochem.appendChild("(NZ,2E-R)"); - ComponentGenerator.processStereochemistry(substituent); + substituent.addChild(stereochem); + processStereochemistry(substituent); - Elements children = substituent.getChildElements(); + List children = substituent.getChildElements(); assertEquals(3, children.size()); Element newStereochemistryEl1 = children.get(0); - assertEquals(STEREOCHEMISTRY_EL, newStereochemistryEl1.getLocalName()); + assertEquals(STEREOCHEMISTRY_EL, newStereochemistryEl1.getName()); assertEquals("N", newStereochemistryEl1.getAttributeValue(LOCANT_ATR)); assertEquals("Z", newStereochemistryEl1.getAttributeValue(VALUE_ATR)); assertEquals(E_OR_Z_TYPE_VAL, newStereochemistryEl1.getAttributeValue(TYPE_ATR)); Element newStereochemistryEl2 = children.get(1); - assertEquals(STEREOCHEMISTRY_EL, newStereochemistryEl2.getLocalName()); + assertEquals(STEREOCHEMISTRY_EL, newStereochemistryEl2.getName()); assertEquals("2", newStereochemistryEl2.getAttributeValue(LOCANT_ATR)); assertEquals("E", newStereochemistryEl2.getAttributeValue(VALUE_ATR)); assertEquals(E_OR_Z_TYPE_VAL, newStereochemistryEl2.getAttributeValue(TYPE_ATR)); Element newStereochemistryEl3 = children.get(2); - assertEquals(STEREOCHEMISTRY_EL, newStereochemistryEl3.getLocalName()); + assertEquals(STEREOCHEMISTRY_EL, newStereochemistryEl3.getName()); assertEquals(null, newStereochemistryEl3.getAttributeValue(LOCANT_ATR)); assertEquals("R", newStereochemistryEl3.getAttributeValue(VALUE_ATR)); assertEquals(R_OR_S_TYPE_VAL, newStereochemistryEl3.getAttributeValue(TYPE_ATR)); @@ -200,23 +190,22 @@ @Test public void testBracketedLocantedCisTrans() throws ComponentGenerationException { - Element substituent = new Element(SUBSTITUENT_EL); - Element stereochem = new Element(STEREOCHEMISTRY_EL); + Element substituent = new GroupingEl(SUBSTITUENT_EL); + Element stereochem = new TokenEl(STEREOCHEMISTRY_EL, "(3cis,5trans)"); stereochem.addAttribute(new Attribute(TYPE_ATR, STEREOCHEMISTRYBRACKET_TYPE_VAL)); - substituent.appendChild(stereochem); - stereochem.appendChild("(3cis,5trans)"); - ComponentGenerator.processStereochemistry(substituent); + substituent.addChild(stereochem); + processStereochemistry(substituent); - Elements children = substituent.getChildElements(); + List children = substituent.getChildElements(); assertEquals(2, children.size()); Element newStereochemistryEl1 = children.get(0); - assertEquals(STEREOCHEMISTRY_EL, newStereochemistryEl1.getLocalName()); + assertEquals(STEREOCHEMISTRY_EL, newStereochemistryEl1.getName()); assertEquals("3", newStereochemistryEl1.getAttributeValue(LOCANT_ATR)); assertEquals("cis", newStereochemistryEl1.getAttributeValue(VALUE_ATR)); assertEquals(CISORTRANS_TYPE_VAL, newStereochemistryEl1.getAttributeValue(TYPE_ATR)); Element newStereochemistryEl2 = children.get(1); - assertEquals(STEREOCHEMISTRY_EL, newStereochemistryEl2.getLocalName()); + assertEquals(STEREOCHEMISTRY_EL, newStereochemistryEl2.getName()); assertEquals("5", newStereochemistryEl2.getAttributeValue(LOCANT_ATR)); assertEquals("trans", newStereochemistryEl2.getAttributeValue(VALUE_ATR)); assertEquals(CISORTRANS_TYPE_VAL, newStereochemistryEl2.getAttributeValue(TYPE_ATR)); @@ -224,108 +213,248 @@ @Test public void testBracketedUnlocantedCisTrans() throws ComponentGenerationException { - Element substituent = new Element(SUBSTITUENT_EL); - Element stereochem = new Element(STEREOCHEMISTRY_EL); + Element substituent = new GroupingEl(SUBSTITUENT_EL); + Element stereochem = new TokenEl(STEREOCHEMISTRY_EL, "(5S-trans)"); stereochem.addAttribute(new Attribute(TYPE_ATR, STEREOCHEMISTRYBRACKET_TYPE_VAL)); - substituent.appendChild(stereochem); - stereochem.appendChild("(5S-trans)"); - ComponentGenerator.processStereochemistry(substituent); + substituent.addChild(stereochem); + processStereochemistry(substituent); - Elements children = substituent.getChildElements(); + List children = substituent.getChildElements(); assertEquals(2, children.size()); Element newStereochemistryEl1 = children.get(0); - assertEquals(STEREOCHEMISTRY_EL, newStereochemistryEl1.getLocalName()); + assertEquals(STEREOCHEMISTRY_EL, newStereochemistryEl1.getName()); assertEquals("5", newStereochemistryEl1.getAttributeValue(LOCANT_ATR)); assertEquals("S", newStereochemistryEl1.getAttributeValue(VALUE_ATR)); assertEquals(R_OR_S_TYPE_VAL, newStereochemistryEl1.getAttributeValue(TYPE_ATR)); Element newStereochemistryEl2 = children.get(1); - assertEquals(STEREOCHEMISTRY_EL, newStereochemistryEl2.getLocalName()); + assertEquals(STEREOCHEMISTRY_EL, newStereochemistryEl2.getName()); assertEquals(null, newStereochemistryEl2.getAttributeValue(LOCANT_ATR)); assertEquals("trans", newStereochemistryEl2.getAttributeValue(VALUE_ATR)); assertEquals(CISORTRANS_TYPE_VAL, newStereochemistryEl2.getAttributeValue(TYPE_ATR)); } @Test + public void testBracketedExo() throws ComponentGenerationException { + Element substituent = new GroupingEl(SUBSTITUENT_EL); + Element stereochem = new TokenEl(STEREOCHEMISTRY_EL, "(exo)"); + stereochem.addAttribute(new Attribute(TYPE_ATR, STEREOCHEMISTRYBRACKET_TYPE_VAL)); + substituent.addChild(stereochem); + processStereochemistry(substituent); + + List children = substituent.getChildElements(); + assertEquals(1, children.size()); + Element newStereochemistryEl = children.get(0); + assertEquals(STEREOCHEMISTRY_EL, newStereochemistryEl.getName()); + assertEquals(null, newStereochemistryEl.getAttributeValue(LOCANT_ATR)); + assertEquals("exo", newStereochemistryEl.getAttributeValue(VALUE_ATR)); + assertEquals(ENDO_EXO_SYN_ANTI_TYPE_VAL, newStereochemistryEl.getAttributeValue(TYPE_ATR)); + } + + @Test + public void testBracketedEndo() throws ComponentGenerationException { + Element substituent = new GroupingEl(SUBSTITUENT_EL); + Element stereochem = new TokenEl(STEREOCHEMISTRY_EL, "(3-endo,5S)"); + stereochem.addAttribute(new Attribute(TYPE_ATR, STEREOCHEMISTRYBRACKET_TYPE_VAL)); + substituent.addChild(stereochem); + processStereochemistry(substituent); + + List children = substituent.getChildElements(); + assertEquals(2, children.size()); + Element newStereochemistryEl1 = children.get(0); + assertEquals(STEREOCHEMISTRY_EL, newStereochemistryEl1.getName()); + assertEquals("3", newStereochemistryEl1.getAttributeValue(LOCANT_ATR)); + assertEquals("endo", newStereochemistryEl1.getAttributeValue(VALUE_ATR)); + assertEquals(ENDO_EXO_SYN_ANTI_TYPE_VAL, newStereochemistryEl1.getAttributeValue(TYPE_ATR)); + + Element newStereochemistryEl2 = children.get(1); + assertEquals(STEREOCHEMISTRY_EL, newStereochemistryEl2.getName()); + assertEquals("5", newStereochemistryEl2.getAttributeValue(LOCANT_ATR)); + assertEquals("S", newStereochemistryEl2.getAttributeValue(VALUE_ATR)); + assertEquals(R_OR_S_TYPE_VAL, newStereochemistryEl2.getAttributeValue(TYPE_ATR)); + } + + @Test public void testLocantedCisTrans() throws ComponentGenerationException { //XML for 3-cis,5-trans: - Element substituent = new Element(SUBSTITUENT_EL); - Element locant = new Element(LOCANT_ATR); - locant.appendChild("3"); - substituent.appendChild(locant); - Element stereochem = new Element(STEREOCHEMISTRY_EL); + Element substituent = new GroupingEl(SUBSTITUENT_EL); + Element locant = new TokenEl(LOCANT_EL, "3"); + substituent.addChild(locant); + Element stereochem = new TokenEl(STEREOCHEMISTRY_EL, "cis"); stereochem.addAttribute(new Attribute(TYPE_ATR, CISORTRANS_TYPE_VAL)); stereochem.addAttribute(new Attribute(VALUE_ATR, "cis")); - stereochem.appendChild("cis"); - substituent.appendChild(stereochem); - locant = new Element(LOCANT_ATR); - locant.appendChild("5"); - substituent.appendChild(locant); - stereochem = new Element(STEREOCHEMISTRY_EL); + substituent.addChild(stereochem); + locant = new TokenEl(LOCANT_EL, "5"); + substituent.addChild(locant); + stereochem = new TokenEl(STEREOCHEMISTRY_EL, "trans"); stereochem.addAttribute(new Attribute(TYPE_ATR, CISORTRANS_TYPE_VAL)); stereochem.addAttribute(new Attribute(VALUE_ATR, "trans")); - stereochem.appendChild("trans"); - substituent.appendChild(stereochem); - ComponentGenerator.processStereochemistry(substituent); + substituent.addChild(stereochem); + processStereochemistry(substituent); - Elements children = substituent.getChildElements(); + List children = substituent.getChildElements(); assertEquals(2, children.size()); Element modifiedStereochemistryEl1 = children.get(0); - assertEquals(STEREOCHEMISTRY_EL, modifiedStereochemistryEl1.getLocalName()); + assertEquals(STEREOCHEMISTRY_EL, modifiedStereochemistryEl1.getName()); assertEquals("3", modifiedStereochemistryEl1.getAttributeValue(LOCANT_ATR)); assertEquals("cis", modifiedStereochemistryEl1.getAttributeValue(VALUE_ATR)); assertEquals(CISORTRANS_TYPE_VAL, modifiedStereochemistryEl1.getAttributeValue(TYPE_ATR)); Element modifiedStereochemistryEl2 = children.get(1); - assertEquals(STEREOCHEMISTRY_EL, modifiedStereochemistryEl2.getLocalName()); + assertEquals(STEREOCHEMISTRY_EL, modifiedStereochemistryEl2.getName()); assertEquals("5", modifiedStereochemistryEl2.getAttributeValue(LOCANT_ATR)); assertEquals("trans", modifiedStereochemistryEl2.getAttributeValue(VALUE_ATR)); assertEquals(CISORTRANS_TYPE_VAL, modifiedStereochemistryEl2.getAttributeValue(TYPE_ATR)); } @Test + public void testLocantedExoOn() throws ComponentGenerationException { + //XML for 3-exobicyclo[2.2.2]oct: + Element substituent = new GroupingEl(SUBSTITUENT_EL); + Element locant = new TokenEl(LOCANT_EL, "3"); + substituent.addChild(locant); + Element stereochem = new TokenEl(STEREOCHEMISTRY_EL, "exo"); + stereochem.addAttribute(new Attribute(TYPE_ATR, ENDO_EXO_SYN_ANTI_TYPE_VAL)); + stereochem.addAttribute(new Attribute(VALUE_ATR, "exo")); + substituent.addChild(stereochem); + Element multiplier = new TokenEl(MULTIPLIER_EL); + multiplier.addAttribute(new Attribute(TYPE_ATR, VONBAEYER_TYPE_VAL)); + substituent.addChild(multiplier); + Element vonBaeyer = new TokenEl(VONBAEYER_EL); + substituent.addChild(vonBaeyer); + Element group = new TokenEl(GROUP_EL); + group.addAttribute(new Attribute(TYPE_ATR, CHAIN_TYPE_VAL)); + group.addAttribute(new Attribute(SUBTYPE_ATR, ALKANESTEM_SUBTYPE_VAL)); + substituent.addChild(group); + processStereochemistry(substituent); + + List children = substituent.getChildElements(); + assertEquals(4, children.size()); + Element modifiedStereochemistryEl = children.get(0); + assertEquals(STEREOCHEMISTRY_EL, modifiedStereochemistryEl.getName()); + assertEquals("3", modifiedStereochemistryEl.getAttributeValue(LOCANT_ATR)); + assertEquals("exo", modifiedStereochemistryEl.getAttributeValue(VALUE_ATR)); + assertEquals(ENDO_EXO_SYN_ANTI_TYPE_VAL, modifiedStereochemistryEl.getAttributeValue(TYPE_ATR)); + } + + @Test + public void testLocantedExo() throws ComponentGenerationException { + //XML for 3-exoamino + Element substituent = new GroupingEl(SUBSTITUENT_EL); + Element locant = new TokenEl(LOCANT_EL, "3"); + substituent.addChild(locant); + Element stereochem = new TokenEl(STEREOCHEMISTRY_EL, "exo"); + stereochem.addAttribute(new Attribute(TYPE_ATR, ENDO_EXO_SYN_ANTI_TYPE_VAL)); + stereochem.addAttribute(new Attribute(VALUE_ATR, "exo")); + substituent.addChild(stereochem); + Element group = new TokenEl(GROUP_EL); + group.addAttribute(new Attribute(TYPE_ATR, SUBSTITUENT_EL)); + group.addAttribute(new Attribute(SUBTYPE_ATR, SIMPLESUBSTITUENT_SUBTYPE_VAL)); + substituent.addChild(group); + processStereochemistry(substituent); + + List children = substituent.getChildElements(); + assertEquals(3, children.size()); + assertEquals(LOCANT_EL, children.get(0).getName()); + Element modifiedStereochemistryEl = children.get(1); + assertEquals(STEREOCHEMISTRY_EL, modifiedStereochemistryEl.getName()); + assertEquals("3", modifiedStereochemistryEl.getAttributeValue(LOCANT_ATR)); + assertEquals("exo", modifiedStereochemistryEl.getAttributeValue(VALUE_ATR)); + assertEquals(ENDO_EXO_SYN_ANTI_TYPE_VAL, modifiedStereochemistryEl.getAttributeValue(TYPE_ATR)); + } + + @Test + public void testAnti() throws ComponentGenerationException { + //XML for anti: + Element substituent = new GroupingEl(SUBSTITUENT_EL); + Element stereochem = new TokenEl(STEREOCHEMISTRY_EL, "anti"); + stereochem.addAttribute(new Attribute(TYPE_ATR, ENDO_EXO_SYN_ANTI_TYPE_VAL)); + stereochem.addAttribute(new Attribute(VALUE_ATR, "anti")); + substituent.addChild(stereochem); + processStereochemistry(substituent); + + List children = substituent.getChildElements(); + assertEquals(1, children.size()); + Element unmodifiedStereochemistryEl = children.get(0); + assertEquals(STEREOCHEMISTRY_EL, unmodifiedStereochemistryEl.getName()); + assertEquals("anti", unmodifiedStereochemistryEl.getAttributeValue(VALUE_ATR)); + assertEquals(ENDO_EXO_SYN_ANTI_TYPE_VAL, unmodifiedStereochemistryEl.getAttributeValue(TYPE_ATR)); + } + + @Test public void testCis() throws ComponentGenerationException { - Element substituent = new Element(SUBSTITUENT_EL); - Element stereochem = new Element(STEREOCHEMISTRY_EL); + Element substituent = new GroupingEl(SUBSTITUENT_EL); + Element stereochem = new TokenEl(STEREOCHEMISTRY_EL, "cis"); stereochem.addAttribute(new Attribute(TYPE_ATR, CISORTRANS_TYPE_VAL)); stereochem.addAttribute(new Attribute(VALUE_ATR, "cis")); - stereochem.appendChild("cis"); - substituent.appendChild(stereochem); - ComponentGenerator.processStereochemistry(substituent); + substituent.addChild(stereochem); + processStereochemistry(substituent); - Elements children = substituent.getChildElements(); + List children = substituent.getChildElements(); assertEquals(1, children.size()); Element modifiedStereochemistryEl1 = children.get(0); - assertEquals(STEREOCHEMISTRY_EL, modifiedStereochemistryEl1.getLocalName()); + assertEquals(STEREOCHEMISTRY_EL, modifiedStereochemistryEl1.getName()); assertEquals(null, modifiedStereochemistryEl1.getAttributeValue(LOCANT_ATR)); assertEquals("cis", modifiedStereochemistryEl1.getAttributeValue(VALUE_ATR)); assertEquals(CISORTRANS_TYPE_VAL, modifiedStereochemistryEl1.getAttributeValue(TYPE_ATR)); } @Test + public void testAxial1() throws ComponentGenerationException { + Element substituent = new GroupingEl(SUBSTITUENT_EL); + Element stereochem = new TokenEl(STEREOCHEMISTRY_EL, "(M)"); + stereochem.addAttribute(new Attribute(TYPE_ATR, STEREOCHEMISTRYBRACKET_TYPE_VAL)); + substituent.addChild(stereochem); + processStereochemistry(substituent); + + List children = substituent.getChildElements(); + assertEquals(1, children.size()); + Element newStereochemistryEl = children.get(0); + assertEquals(STEREOCHEMISTRY_EL, newStereochemistryEl.getName()); + assertEquals(null, newStereochemistryEl.getAttributeValue(LOCANT_ATR)); + assertEquals("M", newStereochemistryEl.getAttributeValue(VALUE_ATR)); + assertEquals(AXIAL_TYPE_VAL, newStereochemistryEl.getAttributeValue(TYPE_ATR)); + } + + @Test + public void testAxial2() throws ComponentGenerationException { + Element substituent = new GroupingEl(SUBSTITUENT_EL); + Element stereochem = new TokenEl(STEREOCHEMISTRY_EL, "(Ra)"); + stereochem.addAttribute(new Attribute(TYPE_ATR, STEREOCHEMISTRYBRACKET_TYPE_VAL)); + substituent.addChild(stereochem); + processStereochemistry(substituent); + + List children = substituent.getChildElements(); + assertEquals(1, children.size()); + Element newStereochemistryEl = children.get(0); + assertEquals(STEREOCHEMISTRY_EL, newStereochemistryEl.getName()); + assertEquals(null, newStereochemistryEl.getAttributeValue(LOCANT_ATR)); + assertEquals("Ra", newStereochemistryEl.getAttributeValue(VALUE_ATR)); + assertEquals(AXIAL_TYPE_VAL, newStereochemistryEl.getAttributeValue(TYPE_ATR)); + } + + @Test public void testZUnbracketted() throws ComponentGenerationException {//note that IUPAC mandates brackets //XML for Z,Z: - Element substituent = new Element(SUBSTITUENT_EL); - Element stereochem = new Element(STEREOCHEMISTRY_EL); + Element substituent = new GroupingEl(SUBSTITUENT_EL); + Element stereochem = new TokenEl(STEREOCHEMISTRY_EL, "Z"); stereochem.addAttribute(new Attribute(TYPE_ATR, E_OR_Z_TYPE_VAL)); - stereochem.appendChild("Z"); - substituent.appendChild(stereochem); - stereochem = new Element(STEREOCHEMISTRY_EL); + substituent.addChild(stereochem); + stereochem = new TokenEl(STEREOCHEMISTRY_EL, "Z"); stereochem.addAttribute(new Attribute(TYPE_ATR, E_OR_Z_TYPE_VAL)); - stereochem.appendChild("Z"); - substituent.appendChild(stereochem); - ComponentGenerator.processStereochemistry(substituent); + substituent.addChild(stereochem); + processStereochemistry(substituent); - Elements children = substituent.getChildElements(); + List children = substituent.getChildElements(); assertEquals(2, children.size()); Element modifiedStereochemistryEl1 = children.get(0); - assertEquals(STEREOCHEMISTRY_EL, modifiedStereochemistryEl1.getLocalName()); + assertEquals(STEREOCHEMISTRY_EL, modifiedStereochemistryEl1.getName()); assertEquals(null, modifiedStereochemistryEl1.getAttributeValue(LOCANT_ATR)); assertEquals("Z", modifiedStereochemistryEl1.getAttributeValue(VALUE_ATR)); assertEquals(E_OR_Z_TYPE_VAL, modifiedStereochemistryEl1.getAttributeValue(TYPE_ATR)); Element modifiedStereochemistryEl2 = children.get(1); - assertEquals(STEREOCHEMISTRY_EL, modifiedStereochemistryEl2.getLocalName()); + assertEquals(STEREOCHEMISTRY_EL, modifiedStereochemistryEl2.getName()); assertEquals(null, modifiedStereochemistryEl2.getAttributeValue(LOCANT_ATR)); assertEquals("Z", modifiedStereochemistryEl2.getAttributeValue(VALUE_ATR)); assertEquals(E_OR_Z_TYPE_VAL, modifiedStereochemistryEl2.getAttributeValue(TYPE_ATR)); @@ -334,79 +463,145 @@ @Test public void testEandZUnbrackettedLocanted() throws ComponentGenerationException {//note that IUPAC mandates brackets //XML for 2E,4Z: - Element substituent = new Element(SUBSTITUENT_EL); - Element locant = new Element(LOCANT_ATR); - locant.appendChild("2"); - substituent.appendChild(locant); - Element stereochem = new Element(STEREOCHEMISTRY_EL); + Element substituent = new GroupingEl(SUBSTITUENT_EL); + Element locant = new TokenEl(LOCANT_EL, "2"); + substituent.addChild(locant); + Element stereochem = new TokenEl(STEREOCHEMISTRY_EL, "E"); stereochem.addAttribute(new Attribute(TYPE_ATR, E_OR_Z_TYPE_VAL)); - stereochem.appendChild("E"); - substituent.appendChild(stereochem); - locant = new Element(LOCANT_ATR); - locant.appendChild("4"); - substituent.appendChild(locant); - stereochem = new Element(STEREOCHEMISTRY_EL); + substituent.addChild(stereochem); + locant = new TokenEl(LOCANT_EL, "4"); + substituent.addChild(locant); + stereochem = new TokenEl(STEREOCHEMISTRY_EL, "Z"); stereochem.addAttribute(new Attribute(TYPE_ATR, E_OR_Z_TYPE_VAL)); - stereochem.appendChild("Z"); - substituent.appendChild(stereochem); - ComponentGenerator.processStereochemistry(substituent); + substituent.addChild(stereochem); + processStereochemistry(substituent); - Elements children = substituent.getChildElements(); + List children = substituent.getChildElements(); assertEquals(2, children.size()); Element modifiedStereochemistryEl1 = children.get(0); - assertEquals(STEREOCHEMISTRY_EL, modifiedStereochemistryEl1.getLocalName()); + assertEquals(STEREOCHEMISTRY_EL, modifiedStereochemistryEl1.getName()); assertEquals("2", modifiedStereochemistryEl1.getAttributeValue(LOCANT_ATR)); assertEquals("E", modifiedStereochemistryEl1.getAttributeValue(VALUE_ATR)); assertEquals(E_OR_Z_TYPE_VAL, modifiedStereochemistryEl1.getAttributeValue(TYPE_ATR)); Element modifiedStereochemistryEl2 = children.get(1); - assertEquals(STEREOCHEMISTRY_EL, modifiedStereochemistryEl2.getLocalName()); + assertEquals(STEREOCHEMISTRY_EL, modifiedStereochemistryEl2.getName()); assertEquals("4", modifiedStereochemistryEl2.getAttributeValue(LOCANT_ATR)); assertEquals("Z", modifiedStereochemistryEl2.getAttributeValue(VALUE_ATR)); assertEquals(E_OR_Z_TYPE_VAL, modifiedStereochemistryEl2.getAttributeValue(TYPE_ATR)); } @Test + public void testEandZUnbrackettedBeforeEne() throws ComponentGenerationException {//not allowed in IUPAC names + //XML for 2E,4Z-diene: + Element substituent = new GroupingEl(SUBSTITUENT_EL); + Element locant = new TokenEl(LOCANT_EL, "2"); + substituent.addChild(locant); + Element stereochem = new TokenEl(STEREOCHEMISTRY_EL, "E"); + stereochem.addAttribute(new Attribute(TYPE_ATR, E_OR_Z_TYPE_VAL)); + substituent.addChild(stereochem); + locant = new TokenEl(LOCANT_EL, "4"); + substituent.addChild(locant); + stereochem = new TokenEl(STEREOCHEMISTRY_EL, "Z"); + stereochem.addAttribute(new Attribute(TYPE_ATR, E_OR_Z_TYPE_VAL)); + substituent.addChild(stereochem); + Element multiplier = new TokenEl(MULTIPLIER_EL, "di"); + multiplier.addAttribute(new Attribute(VALUE_ATR, "2")); + substituent.addChild(multiplier); + Element unsaturator = new TokenEl(UNSATURATOR_EL, "ene"); + unsaturator.addAttribute(new Attribute(VALUE_ATR, "2")); + substituent.addChild(unsaturator); + processStereochemistry(substituent); + + List children = substituent.getChildElements(); + assertEquals(5, children.size()); + Element modifiedStereochemistryEl1 = children.get(0); + assertEquals(STEREOCHEMISTRY_EL, modifiedStereochemistryEl1.getName()); + assertEquals("2", modifiedStereochemistryEl1.getAttributeValue(LOCANT_ATR)); + assertEquals("E", modifiedStereochemistryEl1.getAttributeValue(VALUE_ATR)); + assertEquals(E_OR_Z_TYPE_VAL, modifiedStereochemistryEl1.getAttributeValue(TYPE_ATR)); + + Element modifiedStereochemistryEl2 = children.get(1); + assertEquals(STEREOCHEMISTRY_EL, modifiedStereochemistryEl2.getName()); + assertEquals("4", modifiedStereochemistryEl2.getAttributeValue(LOCANT_ATR)); + assertEquals("Z", modifiedStereochemistryEl2.getAttributeValue(VALUE_ATR)); + assertEquals(E_OR_Z_TYPE_VAL, modifiedStereochemistryEl2.getAttributeValue(TYPE_ATR)); + + Element newLocant = children.get(2); + assertEquals(LOCANT_EL, newLocant.getName()); + assertEquals("2,4", newLocant.getValue()); + assertEquals(MULTIPLIER_EL ,children.get(3).getName()); + assertEquals(UNSATURATOR_EL, children.get(4).getName()); + } + + @Test + public void testEandZUnbrackettedBeforeYlidene() throws ComponentGenerationException {//not allowed in IUPAC names + //XML for 2Z-ylidene: + Element substituent = new GroupingEl(SUBSTITUENT_EL); + Element locant = new TokenEl(LOCANT_EL, "2"); + substituent.addChild(locant); + Element stereochem = new TokenEl(STEREOCHEMISTRY_EL, "Z"); + stereochem.addAttribute(new Attribute(TYPE_ATR, E_OR_Z_TYPE_VAL)); + substituent.addChild(stereochem); + Element suffix = new TokenEl(SUFFIX_EL, "ylidene"); + suffix.addAttribute(new Attribute(VALUE_ATR, "ylidene")); + substituent.addChild(suffix); + processStereochemistry(substituent); + + List children = substituent.getChildElements(); + assertEquals(3, children.size()); + Element modifiedStereochemistryEl1 = children.get(0); + assertEquals(STEREOCHEMISTRY_EL, modifiedStereochemistryEl1.getName()); + assertEquals("2", modifiedStereochemistryEl1.getAttributeValue(LOCANT_ATR)); + assertEquals("Z", modifiedStereochemistryEl1.getAttributeValue(VALUE_ATR)); + assertEquals(E_OR_Z_TYPE_VAL, modifiedStereochemistryEl1.getAttributeValue(TYPE_ATR)); + + Element newLocant = children.get(1); + assertEquals(LOCANT_EL, newLocant.getName()); + assertEquals("2", newLocant.getValue()); + assertEquals(SUFFIX_EL ,children.get(2).getName()); + } + + @Test public void testBrackettedAlphaBeta() throws ComponentGenerationException { - Element substituent = new Element(SUBSTITUENT_EL); - Element stereochem = new Element(STEREOCHEMISTRY_EL); + Element substituent = new GroupingEl(SUBSTITUENT_EL); + Element stereochem = new TokenEl(STEREOCHEMISTRY_EL, "(1a,2b,3bEtA,4alpha,5xi)"); stereochem.addAttribute(new Attribute(TYPE_ATR, STEREOCHEMISTRYBRACKET_TYPE_VAL)); - substituent.appendChild(stereochem); - stereochem.appendChild("(1a,2b,3bEtA,4alpha,5xi)"); - Element naturalProduct = new Element(GROUP_EL); + substituent.addChild(stereochem); + Element naturalProduct = new TokenEl(GROUP_EL); naturalProduct.addAttribute(new Attribute(SUBTYPE_ATR, BIOCHEMICAL_SUBTYPE_VAL)); naturalProduct.addAttribute(new Attribute(ALPHABETACLOCKWISEATOMORDERING_ATR, "")); - substituent.appendChild(naturalProduct); - ComponentGenerator.processStereochemistry(substituent); + substituent.addChild(naturalProduct); + processStereochemistry(substituent); - Elements children = substituent.getChildElements(); + List children = substituent.getChildElements(); assertEquals(6, children.size()); Element newStereochemistryEl = children.get(0); - assertEquals(STEREOCHEMISTRY_EL, newStereochemistryEl.getLocalName()); + assertEquals(STEREOCHEMISTRY_EL, newStereochemistryEl.getName()); assertEquals("1", newStereochemistryEl.getAttributeValue(LOCANT_ATR)); assertEquals("alpha", newStereochemistryEl.getAttributeValue(VALUE_ATR)); assertEquals(ALPHA_OR_BETA_TYPE_VAL, newStereochemistryEl.getAttributeValue(TYPE_ATR)); newStereochemistryEl = children.get(1); - assertEquals(STEREOCHEMISTRY_EL, newStereochemistryEl.getLocalName()); + assertEquals(STEREOCHEMISTRY_EL, newStereochemistryEl.getName()); assertEquals("2", newStereochemistryEl.getAttributeValue(LOCANT_ATR)); assertEquals("beta", newStereochemistryEl.getAttributeValue(VALUE_ATR)); assertEquals(ALPHA_OR_BETA_TYPE_VAL, newStereochemistryEl.getAttributeValue(TYPE_ATR)); newStereochemistryEl = children.get(2); - assertEquals(STEREOCHEMISTRY_EL, newStereochemistryEl.getLocalName()); + assertEquals(STEREOCHEMISTRY_EL, newStereochemistryEl.getName()); assertEquals("3", newStereochemistryEl.getAttributeValue(LOCANT_ATR)); assertEquals("beta", newStereochemistryEl.getAttributeValue(VALUE_ATR)); assertEquals(ALPHA_OR_BETA_TYPE_VAL, newStereochemistryEl.getAttributeValue(TYPE_ATR)); newStereochemistryEl = children.get(3); - assertEquals(STEREOCHEMISTRY_EL, newStereochemistryEl.getLocalName()); + assertEquals(STEREOCHEMISTRY_EL, newStereochemistryEl.getName()); assertEquals("4", newStereochemistryEl.getAttributeValue(LOCANT_ATR)); assertEquals("alpha", newStereochemistryEl.getAttributeValue(VALUE_ATR)); assertEquals(ALPHA_OR_BETA_TYPE_VAL, newStereochemistryEl.getAttributeValue(TYPE_ATR)); newStereochemistryEl = children.get(4); - assertEquals(STEREOCHEMISTRY_EL, newStereochemistryEl.getLocalName()); + assertEquals(STEREOCHEMISTRY_EL, newStereochemistryEl.getName()); assertEquals("5", newStereochemistryEl.getAttributeValue(LOCANT_ATR)); assertEquals("xi", newStereochemistryEl.getAttributeValue(VALUE_ATR)); assertEquals(ALPHA_OR_BETA_TYPE_VAL, newStereochemistryEl.getAttributeValue(TYPE_ATR)); @@ -415,27 +610,26 @@ @Test public void testAlphaBeta() throws ComponentGenerationException { - Element substituent = new Element(SUBSTITUENT_EL); - Element stereochem = new Element(STEREOCHEMISTRY_EL); + Element substituent = new GroupingEl(SUBSTITUENT_EL); + Element stereochem = new TokenEl(STEREOCHEMISTRY_EL, "3beta,5alpha"); stereochem.addAttribute(new Attribute(TYPE_ATR, ALPHA_OR_BETA_TYPE_VAL)); - substituent.appendChild(stereochem); - stereochem.appendChild("3beta,5alpha"); - Element naturalProduct = new Element(GROUP_EL); + substituent.addChild(stereochem); + Element naturalProduct = new TokenEl(GROUP_EL); naturalProduct.addAttribute(new Attribute(SUBTYPE_ATR, BIOCHEMICAL_SUBTYPE_VAL)); naturalProduct.addAttribute(new Attribute(ALPHABETACLOCKWISEATOMORDERING_ATR, "")); - substituent.appendChild(naturalProduct); - ComponentGenerator.processStereochemistry(substituent); + substituent.addChild(naturalProduct); + processStereochemistry(substituent); - Elements children = substituent.getChildElements(); + List children = substituent.getChildElements(); assertEquals(3, children.size()); Element newStereochemistryEl = children.get(0); - assertEquals(STEREOCHEMISTRY_EL, newStereochemistryEl.getLocalName()); + assertEquals(STEREOCHEMISTRY_EL, newStereochemistryEl.getName()); assertEquals("3", newStereochemistryEl.getAttributeValue(LOCANT_ATR)); assertEquals("beta", newStereochemistryEl.getAttributeValue(VALUE_ATR)); assertEquals(ALPHA_OR_BETA_TYPE_VAL, newStereochemistryEl.getAttributeValue(TYPE_ATR)); newStereochemistryEl = children.get(1); - assertEquals(STEREOCHEMISTRY_EL, newStereochemistryEl.getLocalName()); + assertEquals(STEREOCHEMISTRY_EL, newStereochemistryEl.getName()); assertEquals("5", newStereochemistryEl.getAttributeValue(LOCANT_ATR)); assertEquals("alpha", newStereochemistryEl.getAttributeValue(VALUE_ATR)); assertEquals(ALPHA_OR_BETA_TYPE_VAL, newStereochemistryEl.getAttributeValue(TYPE_ATR)); @@ -443,51 +637,49 @@ @Test public void testAlphaBetaNotDirectlyPrecedingANaturalProduct1() throws ComponentGenerationException { - Element substituent = new Element(SUBSTITUENT_EL); - Element stereochem = new Element(STEREOCHEMISTRY_EL); + Element substituent = new GroupingEl(SUBSTITUENT_EL); + Element stereochem = new TokenEl(STEREOCHEMISTRY_EL, "3beta,5alpha"); stereochem.addAttribute(new Attribute(TYPE_ATR, ALPHA_OR_BETA_TYPE_VAL)); - substituent.appendChild(stereochem); - stereochem.appendChild("3beta,5alpha"); - ComponentGenerator.processStereochemistry(substituent); + substituent.addChild(stereochem); + processStereochemistry(substituent); - Elements children = substituent.getChildElements(); + List children = substituent.getChildElements(); assertEquals(3, children.size()); Element newStereochemistryEl = children.get(0); - assertEquals(STEREOCHEMISTRY_EL, newStereochemistryEl.getLocalName()); + assertEquals(STEREOCHEMISTRY_EL, newStereochemistryEl.getName()); assertEquals("3", newStereochemistryEl.getAttributeValue(LOCANT_ATR)); assertEquals("beta", newStereochemistryEl.getAttributeValue(VALUE_ATR)); assertEquals(ALPHA_OR_BETA_TYPE_VAL, newStereochemistryEl.getAttributeValue(TYPE_ATR)); newStereochemistryEl = children.get(1); - assertEquals(STEREOCHEMISTRY_EL, newStereochemistryEl.getLocalName()); + assertEquals(STEREOCHEMISTRY_EL, newStereochemistryEl.getName()); assertEquals("5", newStereochemistryEl.getAttributeValue(LOCANT_ATR)); assertEquals("alpha", newStereochemistryEl.getAttributeValue(VALUE_ATR)); assertEquals(ALPHA_OR_BETA_TYPE_VAL, newStereochemistryEl.getAttributeValue(TYPE_ATR)); Element newLocantEl = children.get(2); - assertEquals(LOCANT_EL, newLocantEl.getLocalName()); + assertEquals(LOCANT_EL, newLocantEl.getName()); assertEquals("3,5", newLocantEl.getValue()); } @Test public void testAlphaBetaNotDirectlyPrecedingANaturalProduct2() throws ComponentGenerationException { - Element substituent = new Element(SUBSTITUENT_EL); - Element stereochem = new Element(STEREOCHEMISTRY_EL); + Element substituent = new GroupingEl(SUBSTITUENT_EL); + Element stereochem = new TokenEl(STEREOCHEMISTRY_EL, "(3beta,5alpha)"); stereochem.addAttribute(new Attribute(TYPE_ATR, STEREOCHEMISTRYBRACKET_TYPE_VAL)); - substituent.appendChild(stereochem); - stereochem.appendChild("(3beta,5alpha)"); - ComponentGenerator.processStereochemistry(substituent); + substituent.addChild(stereochem); + processStereochemistry(substituent); - Elements children = substituent.getChildElements(); + List children = substituent.getChildElements(); assertEquals(2, children.size()); Element newStereochemistryEl = children.get(0); - assertEquals(STEREOCHEMISTRY_EL, newStereochemistryEl.getLocalName()); + assertEquals(STEREOCHEMISTRY_EL, newStereochemistryEl.getName()); assertEquals("3", newStereochemistryEl.getAttributeValue(LOCANT_ATR)); assertEquals("beta", newStereochemistryEl.getAttributeValue(VALUE_ATR)); assertEquals(ALPHA_OR_BETA_TYPE_VAL, newStereochemistryEl.getAttributeValue(TYPE_ATR)); newStereochemistryEl = children.get(1); - assertEquals(STEREOCHEMISTRY_EL, newStereochemistryEl.getLocalName()); + assertEquals(STEREOCHEMISTRY_EL, newStereochemistryEl.getName()); assertEquals("5", newStereochemistryEl.getAttributeValue(LOCANT_ATR)); assertEquals("alpha", newStereochemistryEl.getAttributeValue(VALUE_ATR)); assertEquals(ALPHA_OR_BETA_TYPE_VAL, newStereochemistryEl.getAttributeValue(TYPE_ATR)); @@ -495,96 +687,93 @@ @Test public void testAlphaBetaNotDirectlyPrecedingANaturalProduct3() throws ComponentGenerationException { - Element substituent = new Element(SUBSTITUENT_EL); - Element naturalProduct = new Element(GROUP_EL); + Element substituent = new GroupingEl(SUBSTITUENT_EL); + Element naturalProduct = new TokenEl(GROUP_EL); naturalProduct.addAttribute(new Attribute(SUBTYPE_ATR, BIOCHEMICAL_SUBTYPE_VAL)); naturalProduct.addAttribute(new Attribute(ALPHABETACLOCKWISEATOMORDERING_ATR, "")); - substituent.appendChild(naturalProduct); - Element stereochem = new Element(STEREOCHEMISTRY_EL); + substituent.addChild(naturalProduct); + Element stereochem = new TokenEl(STEREOCHEMISTRY_EL, "3beta,5alpha"); stereochem.addAttribute(new Attribute(TYPE_ATR, ALPHA_OR_BETA_TYPE_VAL)); - substituent.appendChild(stereochem); - stereochem.appendChild("3beta,5alpha"); - ComponentGenerator.processStereochemistry(substituent); + substituent.addChild(stereochem); + processStereochemistry(substituent); - Elements children = substituent.getChildElements(); + List children = substituent.getChildElements(); assertEquals(4, children.size()); Element newStereochemistryEl = children.get(1); - assertEquals(STEREOCHEMISTRY_EL, newStereochemistryEl.getLocalName()); + assertEquals(STEREOCHEMISTRY_EL, newStereochemistryEl.getName()); assertEquals("3", newStereochemistryEl.getAttributeValue(LOCANT_ATR)); assertEquals("beta", newStereochemistryEl.getAttributeValue(VALUE_ATR)); assertEquals(ALPHA_OR_BETA_TYPE_VAL, newStereochemistryEl.getAttributeValue(TYPE_ATR)); newStereochemistryEl = children.get(2); - assertEquals(STEREOCHEMISTRY_EL, newStereochemistryEl.getLocalName()); + assertEquals(STEREOCHEMISTRY_EL, newStereochemistryEl.getName()); assertEquals("5", newStereochemistryEl.getAttributeValue(LOCANT_ATR)); assertEquals("alpha", newStereochemistryEl.getAttributeValue(VALUE_ATR)); assertEquals(ALPHA_OR_BETA_TYPE_VAL, newStereochemistryEl.getAttributeValue(TYPE_ATR)); Element newLocantEl = children.get(3); - assertEquals(LOCANT_EL, newLocantEl.getLocalName()); + assertEquals(LOCANT_EL, newLocantEl.getName()); assertEquals("3,5", newLocantEl.getValue()); } @Test public void testAlphaBetaStereoMixedWithNormalLocants() throws ComponentGenerationException { - Element substituent = new Element(SUBSTITUENT_EL); - Element stereochem = new Element(STEREOCHEMISTRY_EL); + Element substituent = new GroupingEl(SUBSTITUENT_EL); + Element stereochem = new TokenEl(STEREOCHEMISTRY_EL, "3beta,4,10,12alpha"); stereochem.addAttribute(new Attribute(TYPE_ATR, ALPHA_OR_BETA_TYPE_VAL)); - substituent.appendChild(stereochem); - stereochem.appendChild("3beta,4,10,12alpha"); - ComponentGenerator.processStereochemistry(substituent); + substituent.addChild(stereochem); + processStereochemistry(substituent); - Elements children = substituent.getChildElements(); + List children = substituent.getChildElements(); assertEquals(3, children.size()); Element newStereochemistryEl = children.get(0); - assertEquals(STEREOCHEMISTRY_EL, newStereochemistryEl.getLocalName()); + assertEquals(STEREOCHEMISTRY_EL, newStereochemistryEl.getName()); assertEquals("3", newStereochemistryEl.getAttributeValue(LOCANT_ATR)); assertEquals("beta", newStereochemistryEl.getAttributeValue(VALUE_ATR)); assertEquals(ALPHA_OR_BETA_TYPE_VAL, newStereochemistryEl.getAttributeValue(TYPE_ATR)); newStereochemistryEl = children.get(1); - assertEquals(STEREOCHEMISTRY_EL, newStereochemistryEl.getLocalName()); + assertEquals(STEREOCHEMISTRY_EL, newStereochemistryEl.getName()); assertEquals("12", newStereochemistryEl.getAttributeValue(LOCANT_ATR)); assertEquals("alpha", newStereochemistryEl.getAttributeValue(VALUE_ATR)); assertEquals(ALPHA_OR_BETA_TYPE_VAL, newStereochemistryEl.getAttributeValue(TYPE_ATR)); Element newLocantEl = children.get(2); - assertEquals(LOCANT_EL, newLocantEl.getLocalName()); + assertEquals(LOCANT_EL, newLocantEl.getName()); assertEquals("3,4,10,12", newLocantEl.getValue()); } //relative stereochemistry is currently treated the same as absolute stereochemistry @Test public void testRelativeStereoChemistry1() throws ComponentGenerationException { - Element substituent = new Element(SUBSTITUENT_EL); - Element stereochem = new Element(STEREOCHEMISTRY_EL); + Element substituent = new GroupingEl(SUBSTITUENT_EL); + Element stereochem = new TokenEl(STEREOCHEMISTRY_EL, "rel-(1R,3S,4S,7R)"); stereochem.addAttribute(new Attribute(TYPE_ATR, STEREOCHEMISTRYBRACKET_TYPE_VAL)); - substituent.appendChild(stereochem); - stereochem.appendChild("rel-(1R,3S,4S,7R)"); - ComponentGenerator.processStereochemistry(substituent); + substituent.addChild(stereochem); + processStereochemistry(substituent); - Elements children = substituent.getChildElements(); + List children = substituent.getChildElements(); assertEquals(4, children.size()); Element newStereochemistryEl1 = children.get(0); - assertEquals(STEREOCHEMISTRY_EL, newStereochemistryEl1.getLocalName()); + assertEquals(STEREOCHEMISTRY_EL, newStereochemistryEl1.getName()); assertEquals("1", newStereochemistryEl1.getAttributeValue(LOCANT_ATR)); assertEquals("R", newStereochemistryEl1.getAttributeValue(VALUE_ATR)); assertEquals(R_OR_S_TYPE_VAL, newStereochemistryEl1.getAttributeValue(TYPE_ATR)); Element newStereochemistryEl2 = children.get(1); - assertEquals(STEREOCHEMISTRY_EL, newStereochemistryEl2.getLocalName()); + assertEquals(STEREOCHEMISTRY_EL, newStereochemistryEl2.getName()); assertEquals("3", newStereochemistryEl2.getAttributeValue(LOCANT_ATR)); assertEquals("S", newStereochemistryEl2.getAttributeValue(VALUE_ATR)); assertEquals(R_OR_S_TYPE_VAL, newStereochemistryEl2.getAttributeValue(TYPE_ATR)); Element newStereochemistryEl3 = children.get(2); - assertEquals(STEREOCHEMISTRY_EL, newStereochemistryEl3.getLocalName()); + assertEquals(STEREOCHEMISTRY_EL, newStereochemistryEl3.getName()); assertEquals("4", newStereochemistryEl3.getAttributeValue(LOCANT_ATR)); assertEquals("S", newStereochemistryEl3.getAttributeValue(VALUE_ATR)); assertEquals(R_OR_S_TYPE_VAL, newStereochemistryEl3.getAttributeValue(TYPE_ATR)); Element newStereochemistryEl4 = children.get(3); - assertEquals(STEREOCHEMISTRY_EL, newStereochemistryEl4.getLocalName()); + assertEquals(STEREOCHEMISTRY_EL, newStereochemistryEl4.getName()); assertEquals("7", newStereochemistryEl4.getAttributeValue(LOCANT_ATR)); assertEquals("R", newStereochemistryEl4.getAttributeValue(VALUE_ATR)); assertEquals(R_OR_S_TYPE_VAL, newStereochemistryEl4.getAttributeValue(TYPE_ATR)); @@ -592,103 +781,319 @@ @Test public void testRelativeStereoChemistry2() throws ComponentGenerationException { - Element substituent = new Element(SUBSTITUENT_EL); - Element stereochem = new Element(STEREOCHEMISTRY_EL); + Element substituent = new GroupingEl(SUBSTITUENT_EL); + Element stereochem = new TokenEl(STEREOCHEMISTRY_EL, "(1R*,3S*,4S*,7R*)"); stereochem.addAttribute(new Attribute(TYPE_ATR, STEREOCHEMISTRYBRACKET_TYPE_VAL)); - substituent.appendChild(stereochem); - stereochem.appendChild("(1R*,3S*,4S*,7R*)"); - ComponentGenerator.processStereochemistry(substituent); + substituent.addChild(stereochem); + processStereochemistry(substituent); - Elements children = substituent.getChildElements(); + List children = substituent.getChildElements(); assertEquals(4, children.size()); Element newStereochemistryEl1 = children.get(0); - assertEquals(STEREOCHEMISTRY_EL, newStereochemistryEl1.getLocalName()); + assertEquals(STEREOCHEMISTRY_EL, newStereochemistryEl1.getName()); assertEquals("1", newStereochemistryEl1.getAttributeValue(LOCANT_ATR)); assertEquals("R", newStereochemistryEl1.getAttributeValue(VALUE_ATR)); assertEquals(R_OR_S_TYPE_VAL, newStereochemistryEl1.getAttributeValue(TYPE_ATR)); Element newStereochemistryEl2 = children.get(1); - assertEquals(STEREOCHEMISTRY_EL, newStereochemistryEl2.getLocalName()); + assertEquals(STEREOCHEMISTRY_EL, newStereochemistryEl2.getName()); assertEquals("3", newStereochemistryEl2.getAttributeValue(LOCANT_ATR)); assertEquals("S", newStereochemistryEl2.getAttributeValue(VALUE_ATR)); assertEquals(R_OR_S_TYPE_VAL, newStereochemistryEl2.getAttributeValue(TYPE_ATR)); Element newStereochemistryEl3 = children.get(2); - assertEquals(STEREOCHEMISTRY_EL, newStereochemistryEl3.getLocalName()); + assertEquals(STEREOCHEMISTRY_EL, newStereochemistryEl3.getName()); assertEquals("4", newStereochemistryEl3.getAttributeValue(LOCANT_ATR)); assertEquals("S", newStereochemistryEl3.getAttributeValue(VALUE_ATR)); assertEquals(R_OR_S_TYPE_VAL, newStereochemistryEl3.getAttributeValue(TYPE_ATR)); Element newStereochemistryEl4 = children.get(3); - assertEquals(STEREOCHEMISTRY_EL, newStereochemistryEl4.getLocalName()); + assertEquals(STEREOCHEMISTRY_EL, newStereochemistryEl4.getName()); assertEquals("7", newStereochemistryEl4.getAttributeValue(LOCANT_ATR)); assertEquals("R", newStereochemistryEl4.getAttributeValue(VALUE_ATR)); assertEquals(R_OR_S_TYPE_VAL, newStereochemistryEl4.getAttributeValue(TYPE_ATR)); } - //racemates are currently treated identically to completely undefined @Test - public void testRacemate1() throws ComponentGenerationException { - Element substituent = new Element(SUBSTITUENT_EL); - Element stereochem = new Element(STEREOCHEMISTRY_EL); + public void testRelativeStereoChemistry3() throws ComponentGenerationException { + Element substituent = new GroupingEl(SUBSTITUENT_EL); + Element stereochem = new TokenEl(STEREOCHEMISTRY_EL, "rel-"); stereochem.addAttribute(new Attribute(TYPE_ATR, STEREOCHEMISTRYBRACKET_TYPE_VAL)); - substituent.appendChild(stereochem); - stereochem.appendChild("rac-(2R)"); - ComponentGenerator.processStereochemistry(substituent); + substituent.addChild(stereochem); + processStereochemistry(substituent); - Elements children = substituent.getChildElements(); + List children = substituent.getChildElements(); assertEquals(0, children.size()); } + //relativeCisTrans is only supported sufficiently to get constitutionally correct results i.e. locants extracted from the stereochemistry + @Test + public void testRelativeCisTrans() throws ComponentGenerationException { + //c-4- + Element substituent = new GroupingEl(SUBSTITUENT_EL); + Element stereochem = new TokenEl(STEREOCHEMISTRY_EL, "c-4-"); + stereochem.addAttribute(new Attribute(TYPE_ATR, RELATIVECISTRANS_TYPE_VAL)); + substituent.addChild(stereochem); + processStereochemistry(substituent); + + List children = substituent.getChildElements(); + assertEquals(2, children.size()); + Element modifiedStereochemistryEl1 = children.get(0); + assertEquals(STEREOCHEMISTRY_EL, modifiedStereochemistryEl1.getName()); + assertEquals(null, modifiedStereochemistryEl1.getAttributeValue(LOCANT_ATR)); + assertEquals("c-4-", modifiedStereochemistryEl1.getValue()); + assertEquals(RELATIVECISTRANS_TYPE_VAL, modifiedStereochemistryEl1.getAttributeValue(TYPE_ATR)); + Element locant = children.get(1); + assertEquals(LOCANT_EL, locant.getName()); + assertEquals("4", locant.getValue()); + } + + @Test + public void testRacemate1() throws ComponentGenerationException { + Element substituent = new GroupingEl(SUBSTITUENT_EL); + Element stereochem = new TokenEl(STEREOCHEMISTRY_EL, "rac-(2R)"); + stereochem.addAttribute(new Attribute(TYPE_ATR, STEREOCHEMISTRYBRACKET_TYPE_VAL)); + substituent.addChild(stereochem); + processStereochemistry(substituent); + + List children = substituent.getChildElements(); + assertEquals(1, children.size()); + Element newStereochemistryEl = children.get(0); + assertEquals(STEREOCHEMISTRY_EL, newStereochemistryEl.getName()); + assertEquals("2", newStereochemistryEl.getAttributeValue(LOCANT_ATR)); + assertEquals("RS", newStereochemistryEl.getAttributeValue(VALUE_ATR)); + assertEquals(R_OR_S_TYPE_VAL, newStereochemistryEl.getAttributeValue(TYPE_ATR)); + } + @Test public void testRacemate2() throws ComponentGenerationException { - Element substituent = new Element(SUBSTITUENT_EL); - Element stereochem = new Element(STEREOCHEMISTRY_EL); + Element substituent = new GroupingEl(SUBSTITUENT_EL); + Element stereochem = new TokenEl(STEREOCHEMISTRY_EL, "(RS)"); stereochem.addAttribute(new Attribute(TYPE_ATR, STEREOCHEMISTRYBRACKET_TYPE_VAL)); - substituent.appendChild(stereochem); - stereochem.appendChild("(RS)"); - ComponentGenerator.processStereochemistry(substituent); + substituent.addChild(stereochem); + processStereochemistry(substituent); - Elements children = substituent.getChildElements(); - assertEquals(0, children.size()); + List children = substituent.getChildElements(); + assertEquals(1, children.size()); + Element newStereochemistryEl = children.get(0); + assertEquals(STEREOCHEMISTRY_EL, newStereochemistryEl.getName()); + assertEquals(null, newStereochemistryEl.getAttributeValue(LOCANT_ATR)); + assertEquals("RS", newStereochemistryEl.getAttributeValue(VALUE_ATR)); + assertEquals(R_OR_S_TYPE_VAL, newStereochemistryEl.getAttributeValue(TYPE_ATR)); + } + + @Test + public void testRacemate2_ci() throws ComponentGenerationException { + Element substituent = new GroupingEl(SUBSTITUENT_EL); + Element stereochem = new TokenEl(STEREOCHEMISTRY_EL, "(rs)"); + stereochem.addAttribute(new Attribute(TYPE_ATR, STEREOCHEMISTRYBRACKET_TYPE_VAL)); + substituent.addChild(stereochem); + processStereochemistry(substituent); + + List children = substituent.getChildElements(); + assertEquals(1, children.size()); + Element newStereochemistryEl = children.get(0); + assertEquals(STEREOCHEMISTRY_EL, newStereochemistryEl.getName()); + assertEquals(null, newStereochemistryEl.getAttributeValue(LOCANT_ATR)); + assertEquals("RS", newStereochemistryEl.getAttributeValue(VALUE_ATR)); + assertEquals(R_OR_S_TYPE_VAL, newStereochemistryEl.getAttributeValue(TYPE_ATR)); } @Test public void testRacemate3() throws ComponentGenerationException { - Element substituent = new Element(SUBSTITUENT_EL); - Element stereochem = new Element(STEREOCHEMISTRY_EL); + Element substituent = new GroupingEl(SUBSTITUENT_EL); + Element stereochem = new TokenEl(STEREOCHEMISTRY_EL, "(SR)"); stereochem.addAttribute(new Attribute(TYPE_ATR, STEREOCHEMISTRYBRACKET_TYPE_VAL)); - substituent.appendChild(stereochem); - stereochem.appendChild("(RS)"); - ComponentGenerator.processStereochemistry(substituent); + substituent.addChild(stereochem); + processStereochemistry(substituent); - Elements children = substituent.getChildElements(); - assertEquals(0, children.size()); + List children = substituent.getChildElements(); + assertEquals(1, children.size()); + Element newStereochemistryEl = children.get(0); + assertEquals(STEREOCHEMISTRY_EL, newStereochemistryEl.getName()); + assertEquals(null, newStereochemistryEl.getAttributeValue(LOCANT_ATR)); + assertEquals("SR", newStereochemistryEl.getAttributeValue(VALUE_ATR)); + assertEquals(R_OR_S_TYPE_VAL, newStereochemistryEl.getAttributeValue(TYPE_ATR)); } @Test public void testRacemate4() throws ComponentGenerationException { - Element substituent = new Element(SUBSTITUENT_EL); - Element stereochem = new Element(STEREOCHEMISTRY_EL); + Element substituent = new GroupingEl(SUBSTITUENT_EL); + Element stereochem = new TokenEl(STEREOCHEMISTRY_EL, "rac-(2R,4S)"); stereochem.addAttribute(new Attribute(TYPE_ATR, STEREOCHEMISTRYBRACKET_TYPE_VAL)); - substituent.appendChild(stereochem); - stereochem.appendChild("rac-(2R,4R)"); - ComponentGenerator.processStereochemistry(substituent); + substituent.addChild(stereochem); + processStereochemistry(substituent); - Elements children = substituent.getChildElements(); - assertEquals(0, children.size()); + List children = substituent.getChildElements(); + assertEquals(2, children.size()); + Element newStereochemistryEl1 = children.get(0); + assertEquals(STEREOCHEMISTRY_EL, newStereochemistryEl1.getName()); + assertEquals("2", newStereochemistryEl1.getAttributeValue(LOCANT_ATR)); + assertEquals("RS", newStereochemistryEl1.getAttributeValue(VALUE_ATR)); + assertEquals(R_OR_S_TYPE_VAL, newStereochemistryEl1.getAttributeValue(TYPE_ATR)); + + Element newStereochemistryEl2 = children.get(1); + assertEquals(STEREOCHEMISTRY_EL, newStereochemistryEl2.getName()); + assertEquals("4", newStereochemistryEl2.getAttributeValue(LOCANT_ATR)); + assertEquals("SR", newStereochemistryEl2.getAttributeValue(VALUE_ATR)); + assertEquals(R_OR_S_TYPE_VAL, newStereochemistryEl2.getAttributeValue(TYPE_ATR)); } @Test public void testRacemate5() throws ComponentGenerationException { - Element substituent = new Element(SUBSTITUENT_EL); - Element stereochem = new Element(STEREOCHEMISTRY_EL); + Element substituent = new GroupingEl(SUBSTITUENT_EL); + Element stereochem = new TokenEl(STEREOCHEMISTRY_EL, "(2RS,4SR)"); + stereochem.addAttribute(new Attribute(TYPE_ATR, STEREOCHEMISTRYBRACKET_TYPE_VAL)); + substituent.addChild(stereochem); + processStereochemistry(substituent); + + List children = substituent.getChildElements(); + assertEquals(2, children.size()); + Element newStereochemistryEl1 = children.get(0); + assertEquals(STEREOCHEMISTRY_EL, newStereochemistryEl1.getName()); + assertEquals("2", newStereochemistryEl1.getAttributeValue(LOCANT_ATR)); + assertEquals("RS", newStereochemistryEl1.getAttributeValue(VALUE_ATR)); + assertEquals(R_OR_S_TYPE_VAL, newStereochemistryEl1.getAttributeValue(TYPE_ATR)); + + Element newStereochemistryEl2 = children.get(1); + assertEquals(STEREOCHEMISTRY_EL, newStereochemistryEl2.getName()); + assertEquals("4", newStereochemistryEl2.getAttributeValue(LOCANT_ATR)); + assertEquals("SR", newStereochemistryEl2.getAttributeValue(VALUE_ATR)); + assertEquals(R_OR_S_TYPE_VAL, newStereochemistryEl2.getAttributeValue(TYPE_ATR)); + } + + @Test + public void testRacemate6() throws ComponentGenerationException { + Element substituent = new GroupingEl(SUBSTITUENT_EL); + Element stereochem = new TokenEl(STEREOCHEMISTRY_EL, "rac-"); + stereochem.addAttribute(new Attribute(TYPE_ATR, STEREOCHEMISTRYBRACKET_TYPE_VAL)); + substituent.addChild(stereochem); + processStereochemistry(substituent); + + List children = substituent.getChildElements(); + assertEquals(0, children.size()); + } + + @Test + public void testRacemate7() throws ComponentGenerationException { + Element substituent = new GroupingEl(SUBSTITUENT_EL); + Element stereochem = new TokenEl(STEREOCHEMISTRY_EL, "racem-"); + stereochem.addAttribute(new Attribute(TYPE_ATR, STEREOCHEMISTRYBRACKET_TYPE_VAL)); + substituent.addChild(stereochem); + processStereochemistry(substituent); + + List children = substituent.getChildElements(); + assertEquals(0, children.size()); + } + + @Test + public void testRacemate8() throws ComponentGenerationException { + Element substituent = new GroupingEl(SUBSTITUENT_EL); + Element stereochem = new TokenEl(STEREOCHEMISTRY_EL, "racemic-"); stereochem.addAttribute(new Attribute(TYPE_ATR, STEREOCHEMISTRYBRACKET_TYPE_VAL)); - substituent.appendChild(stereochem); - stereochem.appendChild("(2RS,4RS)"); - ComponentGenerator.processStereochemistry(substituent); + substituent.addChild(stereochem); + processStereochemistry(substituent); - Elements children = substituent.getChildElements(); + List children = substituent.getChildElements(); assertEquals(0, children.size()); } + + @Test + public void testRacemate9() throws ComponentGenerationException { + Element substituent = new GroupingEl(SUBSTITUENT_EL); + Element stereochem = new TokenEl(STEREOCHEMISTRY_EL, "(R/S)-"); + stereochem.addAttribute(new Attribute(TYPE_ATR, STEREOCHEMISTRYBRACKET_TYPE_VAL)); + substituent.addChild(stereochem); + processStereochemistry(substituent); + + List children = substituent.getChildElements(); + assertEquals(1, children.size()); + Element newStereochemistryEl = children.get(0); + assertEquals(STEREOCHEMISTRY_EL, newStereochemistryEl.getName()); + assertEquals(null, newStereochemistryEl.getAttributeValue(LOCANT_ATR)); + assertEquals("RS", newStereochemistryEl.getAttributeValue(VALUE_ATR)); + assertEquals(R_OR_S_TYPE_VAL, newStereochemistryEl.getAttributeValue(TYPE_ATR)); + } + + @Test + public void testRacemate10() throws ComponentGenerationException { + Element substituent = new GroupingEl(SUBSTITUENT_EL); + Element stereochem = new TokenEl(STEREOCHEMISTRY_EL, "(RAC)"); + stereochem.addAttribute(new Attribute(TYPE_ATR, STEREOCHEMISTRYBRACKET_TYPE_VAL)); + substituent.addChild(stereochem); + processStereochemistry(substituent); + + List children = substituent.getChildElements(); + assertEquals(0, children.size()); + } + + @Test + public void testRacemateEz1() throws ComponentGenerationException { + Element substituent = new GroupingEl(SUBSTITUENT_EL); + Element stereochem = new TokenEl(STEREOCHEMISTRY_EL, "(EZ)"); + stereochem.addAttribute(new Attribute(TYPE_ATR, STEREOCHEMISTRYBRACKET_TYPE_VAL)); + substituent.addChild(stereochem); + processStereochemistry(substituent); + + List children = substituent.getChildElements(); + Element modifiedStereochemistryEl1 = children.get(0); + assertEquals(STEREOCHEMISTRY_EL, modifiedStereochemistryEl1.getName()); + assertEquals("EZ", modifiedStereochemistryEl1.getAttributeValue(VALUE_ATR)); + assertEquals(E_OR_Z_TYPE_VAL, modifiedStereochemistryEl1.getAttributeValue(TYPE_ATR)); + } + + @Test + public void testRacemateEz2() throws ComponentGenerationException { + Element substituent = new GroupingEl(SUBSTITUENT_EL); + Element stereochem = new TokenEl(STEREOCHEMISTRY_EL, "(2EZ)"); + stereochem.addAttribute(new Attribute(TYPE_ATR, STEREOCHEMISTRYBRACKET_TYPE_VAL)); + substituent.addChild(stereochem); + processStereochemistry(substituent); + + List children = substituent.getChildElements(); + assertEquals(1, children.size()); + Element modifiedStereochemistryEl1 = children.get(0); + assertEquals(STEREOCHEMISTRY_EL, modifiedStereochemistryEl1.getName()); + assertEquals("2", modifiedStereochemistryEl1.getAttributeValue(LOCANT_ATR)); + assertEquals("EZ", modifiedStereochemistryEl1.getAttributeValue(VALUE_ATR)); + assertEquals(E_OR_Z_TYPE_VAL, modifiedStereochemistryEl1.getAttributeValue(TYPE_ATR)); + } + + @Test + public void testRacemateEz3_unbracketted() throws ComponentGenerationException { + Element substituent = new GroupingEl(SUBSTITUENT_EL); + Element locant = new TokenEl(LOCANT_EL, "2"); + substituent.addChild(locant); + Element stereochem = new TokenEl(STEREOCHEMISTRY_EL, "ez"); + stereochem.addAttribute(new Attribute(TYPE_ATR, E_OR_Z_TYPE_VAL)); + substituent.addChild(stereochem); + processStereochemistry(substituent); + + List children = substituent.getChildElements(); + assertEquals(1, children.size()); + Element modifiedStereochemistryEl1 = children.get(0); + assertEquals(STEREOCHEMISTRY_EL, modifiedStereochemistryEl1.getName()); + assertEquals("2", modifiedStereochemistryEl1.getAttributeValue(LOCANT_ATR)); + assertEquals("EZ", modifiedStereochemistryEl1.getAttributeValue(VALUE_ATR)); + assertEquals(E_OR_Z_TYPE_VAL, modifiedStereochemistryEl1.getAttributeValue(TYPE_ATR)); + } + + @Test + public void testRacemateEz4_unbracketted() throws ComponentGenerationException { + Element substituent = new GroupingEl(SUBSTITUENT_EL); + Element stereochem = new TokenEl(STEREOCHEMISTRY_EL, "EZ"); + stereochem.addAttribute(new Attribute(TYPE_ATR, E_OR_Z_TYPE_VAL)); + substituent.addChild(stereochem); + processStereochemistry(substituent); + + List children = substituent.getChildElements(); + Element modifiedStereochemistryEl1 = children.get(0); + assertEquals(STEREOCHEMISTRY_EL, modifiedStereochemistryEl1.getName()); + assertEquals("EZ", modifiedStereochemistryEl1.getAttributeValue(VALUE_ATR)); + assertEquals(E_OR_Z_TYPE_VAL, modifiedStereochemistryEl1.getAttributeValue(TYPE_ATR)); + } + + private void processStereochemistry(Element subOrRoot) throws ComponentGenerationException { + new ComponentGenerator(new NameToStructureConfig()).processStereochemistry(subOrRoot); + } } diff -Nru opsin-1.5.0/opsin-core/src/test/java/uk/ac/cam/ch/wwmm/opsin/ComponentProcessorTest.java opsin-2.3.1/opsin-core/src/test/java/uk/ac/cam/ch/wwmm/opsin/ComponentProcessorTest.java --- opsin-1.5.0/opsin-core/src/test/java/uk/ac/cam/ch/wwmm/opsin/ComponentProcessorTest.java 2013-07-21 14:02:29.000000000 +0000 +++ opsin-2.3.1/opsin-core/src/test/java/uk/ac/cam/ch/wwmm/opsin/ComponentProcessorTest.java 2017-07-23 20:55:18.000000000 +0000 @@ -1,120 +1,119 @@ package uk.ac.cam.ch.wwmm.opsin; import static uk.ac.cam.ch.wwmm.opsin.XmlDeclarations.*; -import static junit.framework.Assert.*; -import nu.xom.Attribute; -import nu.xom.Element; -import static org.mockito.Mockito.mock; import org.junit.Test; +import static org.junit.Assert.*; +import static org.mockito.Mockito.mock; + public class ComponentProcessorTest { @Test(expected=ComponentGenerationException.class) public void testSubtractiveWithNoGroupToAttachTo() throws ComponentGenerationException{ - Element word = new Element(WORD_EL); - Element substituent = new Element(SUBSTITUENT_EL); - word.appendChild(substituent); - Element substractivePrefix = new Element(SUBTRACTIVEPREFIX_EL); + Element word = new GroupingEl(WORD_EL); + Element substituent = new GroupingEl(SUBSTITUENT_EL); + word.addChild(substituent); + Element substractivePrefix = new TokenEl(SUBTRACTIVEPREFIX_EL); substractivePrefix.addAttribute(new Attribute(TYPE_ATR, DEOXY_TYPE_VAL)); - substituent.appendChild(substractivePrefix); + substituent.addChild(substractivePrefix); ComponentProcessor.removeAndMoveToAppropriateGroupIfSubtractivePrefix(substituent); } @Test public void testSubtractiveWithBiochemicalToAttachTo() throws ComponentGenerationException{ - Element word = new Element(WORD_EL); - Element substituent = new Element(SUBSTITUENT_EL); - Element substractivePrefix = new Element(SUBTRACTIVEPREFIX_EL); + Element word = new GroupingEl(WORD_EL); + Element substituent = new GroupingEl(SUBSTITUENT_EL); + Element substractivePrefix = new TokenEl(SUBTRACTIVEPREFIX_EL); substractivePrefix.addAttribute(new Attribute(TYPE_ATR, DEOXY_TYPE_VAL)); - substituent.appendChild(substractivePrefix); - word.appendChild(substituent); - Element root = new Element(ROOT_EL); - word.appendChild(root); - Element group = new Element(GROUP_EL); + substituent.addChild(substractivePrefix); + word.addChild(substituent); + Element root = new GroupingEl(ROOT_EL); + word.addChild(root); + Element group = new TokenEl(GROUP_EL); group.addAttribute(new Attribute(SUBTYPE_ATR, BIOCHEMICAL_SUBTYPE_VAL)); - root.appendChild(group); + root.addChild(group); ComponentProcessor.removeAndMoveToAppropriateGroupIfSubtractivePrefix(substituent); assertEquals("Substractive prefix should of been detached", null, substituent.getParent()); - assertEquals(2, root.getChildElements().size()); + assertEquals(2, root.getChildCount()); assertEquals(substractivePrefix, root.getChildElements().get(0)); } @Test public void testSubtractiveRightMostPreferred() throws ComponentGenerationException{ - Element word = new Element(WORD_EL); - Element substituent = new Element(SUBSTITUENT_EL); - Element substractivePrefix = new Element(SUBTRACTIVEPREFIX_EL); + Element word = new GroupingEl(WORD_EL); + Element substituent = new GroupingEl(SUBSTITUENT_EL); + Element substractivePrefix = new TokenEl(SUBTRACTIVEPREFIX_EL); substractivePrefix.addAttribute(new Attribute(TYPE_ATR, DEOXY_TYPE_VAL)); - substituent.appendChild(substractivePrefix); - word.appendChild(substituent); - Element substituent2 = new Element(SUBSTITUENT_EL); - Element group1 = new Element(GROUP_EL); + substituent.addChild(substractivePrefix); + word.addChild(substituent); + Element substituent2 = new GroupingEl(SUBSTITUENT_EL); + Element group1 = new TokenEl(GROUP_EL); group1.addAttribute(new Attribute(TYPE_ATR, SIMPLEGROUP_SUBTYPE_VAL)); group1.addAttribute(new Attribute(SUBTYPE_ATR, SIMPLEGROUP_SUBTYPE_VAL)); - substituent2.appendChild(group1); - word.appendChild(substituent2); - Element root = new Element(ROOT_EL); - word.appendChild(root); - Element group2 = new Element(GROUP_EL); + substituent2.addChild(group1); + word.addChild(substituent2); + Element root = new GroupingEl(ROOT_EL); + word.addChild(root); + Element group2 = new TokenEl(GROUP_EL); group2.addAttribute(new Attribute(TYPE_ATR, SIMPLEGROUP_SUBTYPE_VAL)); group2.addAttribute(new Attribute(SUBTYPE_ATR, BIOCHEMICAL_SUBTYPE_VAL)); - root.appendChild(group2); + root.addChild(group2); ComponentProcessor.removeAndMoveToAppropriateGroupIfSubtractivePrefix(substituent); assertEquals("Substractive prefix should of been detached", null, substituent.getParent()); - assertEquals(2, root.getChildElements().size()); + assertEquals(2, root.getChildCount()); assertEquals(substractivePrefix, root.getChildElements().get(0)); } @Test public void testSubtractiveBiochemicalPreferredToRightMost() throws ComponentGenerationException{ - Element word = new Element(WORD_EL); - Element substituent = new Element(SUBSTITUENT_EL); - Element substractivePrefix = new Element(SUBTRACTIVEPREFIX_EL); + Element word = new GroupingEl(WORD_EL); + Element substituent = new GroupingEl(SUBSTITUENT_EL); + Element substractivePrefix = new TokenEl(SUBTRACTIVEPREFIX_EL); substractivePrefix.addAttribute(new Attribute(TYPE_ATR, DEOXY_TYPE_VAL)); - substituent.appendChild(substractivePrefix); - word.appendChild(substituent); - Element substituent2 = new Element(SUBSTITUENT_EL); - Element group1 = new Element(GROUP_EL); + substituent.addChild(substractivePrefix); + word.addChild(substituent); + Element substituent2 = new GroupingEl(SUBSTITUENT_EL); + Element group1 = new TokenEl(GROUP_EL); group1.addAttribute(new Attribute(SUBTYPE_ATR, BIOCHEMICAL_SUBTYPE_VAL)); - substituent2.appendChild(group1); - word.appendChild(substituent2); - Element root = new Element(ROOT_EL); - word.appendChild(root); - Element group2 = new Element(GROUP_EL); + substituent2.addChild(group1); + word.addChild(substituent2); + Element root = new GroupingEl(ROOT_EL); + word.addChild(root); + Element group2 = new TokenEl(GROUP_EL); group2.addAttribute(new Attribute(SUBTYPE_ATR, SIMPLEGROUP_SUBTYPE_VAL)); - root.appendChild(group2); + root.addChild(group2); ComponentProcessor.removeAndMoveToAppropriateGroupIfSubtractivePrefix(substituent); assertEquals("Substractive prefix should of been detached", null, substituent.getParent()); - assertEquals(1, root.getChildElements().size()); - assertEquals(2, substituent2.getChildElements().size()); + assertEquals(1, root.getChildCount()); + assertEquals(2, substituent2.getChildCount()); assertEquals(substractivePrefix, substituent2.getChildElements().get(0)); } @Test public void testSubtractiveWithMultiplierAndLocants() throws ComponentGenerationException{ - Element word = new Element(WORD_EL); - Element substituent = new Element(SUBSTITUENT_EL); - Element locant = new Element(LOCANT_EL); - substituent.appendChild(locant); - Element multiplier = new Element(MULTIPLIER_EL); - substituent.appendChild(multiplier); - Element substractivePrefix = new Element(SUBTRACTIVEPREFIX_EL); + Element word = new GroupingEl(WORD_EL); + Element substituent = new GroupingEl(SUBSTITUENT_EL); + Element locant = new TokenEl(LOCANT_EL); + substituent.addChild(locant); + Element multiplier = new TokenEl(MULTIPLIER_EL); + substituent.addChild(multiplier); + Element substractivePrefix = new TokenEl(SUBTRACTIVEPREFIX_EL); substractivePrefix.addAttribute(new Attribute(TYPE_ATR, DEOXY_TYPE_VAL)); - substituent.appendChild(substractivePrefix); - word.appendChild(substituent); - Element root = new Element(ROOT_EL); - word.appendChild(root); - Element group = new Element(GROUP_EL); + substituent.addChild(substractivePrefix); + word.addChild(substituent); + Element root = new GroupingEl(ROOT_EL); + word.addChild(root); + Element group = new TokenEl(GROUP_EL); group.addAttribute(new Attribute(SUBTYPE_ATR, BIOCHEMICAL_SUBTYPE_VAL)); - root.appendChild(group); + root.addChild(group); ComponentProcessor.removeAndMoveToAppropriateGroupIfSubtractivePrefix(substituent); assertEquals("Substractive prefix should of been detached", null, substituent.getParent()); - assertEquals(4, root.getChildElements().size()); + assertEquals(4, root.getChildCount()); assertEquals(locant, root.getChildElements().get(0)); assertEquals(multiplier, root.getChildElements().get(1)); assertEquals(substractivePrefix, root.getChildElements().get(2)); @@ -122,102 +121,102 @@ @Test public void testDLStereochemistryLOnAminoAcid() throws ComponentGenerationException, StructureBuildingException{ - BuildState state = new BuildState(mock(NameToStructureConfig.class), new SMILESFragmentBuilder()); + BuildState state = new BuildState(mock(NameToStructureConfig.class)); Fragment f = state.fragManager.buildSMILES("N[C@@H](C)C"); - Element aminoAcidEl = new Element(GROUP_EL); - state.xmlFragmentMap.put(aminoAcidEl, f); + Element aminoAcidEl = new TokenEl(GROUP_EL); + aminoAcidEl.setFrag(f); int parityBefore = f.getAtomByID(2).getAtomParity().getParity(); - ComponentProcessor processor = new ComponentProcessor(mock(SuffixRules.class), state, mock(Element.class)); - processor.applyDlStereochemistryToAminoAcid(aminoAcidEl, "l"); + ComponentProcessor processor = new ComponentProcessor(state, mock(SuffixApplier.class)); + assertEquals(true, processor.applyDlStereochemistryToAminoAcid(aminoAcidEl, "l")); assertEquals(parityBefore, f.getAtomByID(2).getAtomParity().getParity()); } @Test public void testDLStereochemistryDOnAminoAcid() throws ComponentGenerationException, StructureBuildingException{ - BuildState state = new BuildState(mock(NameToStructureConfig.class), new SMILESFragmentBuilder()); + BuildState state = new BuildState(mock(NameToStructureConfig.class)); Fragment f = state.fragManager.buildSMILES("N[C@@H](C)C"); - Element aminoAcidEl = new Element(GROUP_EL); - state.xmlFragmentMap.put(aminoAcidEl, f); + Element aminoAcidEl = new TokenEl(GROUP_EL); + aminoAcidEl.setFrag(f); int parityBefore = f.getAtomByID(2).getAtomParity().getParity(); - ComponentProcessor processor = new ComponentProcessor(mock(SuffixRules.class), state, mock(Element.class)); - processor.applyDlStereochemistryToAminoAcid(aminoAcidEl, "d"); + ComponentProcessor processor = new ComponentProcessor(state, mock(SuffixApplier.class)); + assertEquals(true, processor.applyDlStereochemistryToAminoAcid(aminoAcidEl, "d")); assertEquals(parityBefore, -f.getAtomByID(2).getAtomParity().getParity()); } @Test public void testDLStereochemistryDLOnAminoAcid() throws ComponentGenerationException, StructureBuildingException{ - BuildState state = new BuildState(mock(NameToStructureConfig.class), new SMILESFragmentBuilder()); + BuildState state = new BuildState(mock(NameToStructureConfig.class)); Fragment f = state.fragManager.buildSMILES("N[C@@H](C)C"); - Element aminoAcidEl = new Element(GROUP_EL); - state.xmlFragmentMap.put(aminoAcidEl, f); - ComponentProcessor processor = new ComponentProcessor(mock(SuffixRules.class), state, mock(Element.class)); - processor.applyDlStereochemistryToAminoAcid(aminoAcidEl, "dl"); + Element aminoAcidEl = new TokenEl(GROUP_EL); + aminoAcidEl.setFrag(f); + ComponentProcessor processor = new ComponentProcessor(state, mock(SuffixApplier.class)); + assertEquals(true, processor.applyDlStereochemistryToAminoAcid(aminoAcidEl, "dl")); assertEquals(null, f.getAtomByID(2).getAtomParity()); } - @Test(expected=ComponentGenerationException.class) + @Test public void testDLStereochemistryDOnAchiralAminoAcid() throws ComponentGenerationException, StructureBuildingException{ - BuildState state = new BuildState(mock(NameToStructureConfig.class), new SMILESFragmentBuilder()); + BuildState state = new BuildState(mock(NameToStructureConfig.class)); Fragment f = state.fragManager.buildSMILES("NC(C)C"); - Element aminoAcidEl = new Element(GROUP_EL); - state.xmlFragmentMap.put(aminoAcidEl, f); - ComponentProcessor processor = new ComponentProcessor(mock(SuffixRules.class), state, mock(Element.class)); - processor.applyDlStereochemistryToAminoAcid(aminoAcidEl, "d"); + Element aminoAcidEl = new TokenEl(GROUP_EL); + aminoAcidEl.setFrag(f); + ComponentProcessor processor = new ComponentProcessor(state, mock(SuffixApplier.class)); + assertEquals(false, processor.applyDlStereochemistryToAminoAcid(aminoAcidEl, "d")); } @Test public void testDLStereochemistryLOnCarbohydrate() throws ComponentGenerationException, StructureBuildingException{ - BuildState state = new BuildState(mock(NameToStructureConfig.class), new SMILESFragmentBuilder()); + BuildState state = new BuildState(mock(NameToStructureConfig.class)); Fragment f = state.fragManager.buildSMILES("N[C@@H](C)C"); - Element carbohydrateEl = new Element(GROUP_EL); - state.xmlFragmentMap.put(carbohydrateEl, f); + Element carbohydrateEl = new TokenEl(GROUP_EL); + carbohydrateEl.setFrag(f); int parityBefore = f.getAtomByID(2).getAtomParity().getParity(); - ComponentProcessor processor = new ComponentProcessor(mock(SuffixRules.class), state, mock(Element.class)); + ComponentProcessor processor = new ComponentProcessor(state, mock(SuffixApplier.class)); processor.applyDlStereochemistryToCarbohydrate(carbohydrateEl, "l"); assertEquals(parityBefore, -f.getAtomByID(2).getAtomParity().getParity()); } @Test public void testDLStereochemistryDOnCarbohydrate() throws ComponentGenerationException, StructureBuildingException{ - BuildState state = new BuildState(mock(NameToStructureConfig.class), new SMILESFragmentBuilder()); + BuildState state = new BuildState(mock(NameToStructureConfig.class)); Fragment f = state.fragManager.buildSMILES("N[C@@H](C)C"); - Element carbohydrateEl = new Element(GROUP_EL); - state.xmlFragmentMap.put(carbohydrateEl, f); + Element carbohydrateEl = new TokenEl(GROUP_EL); + carbohydrateEl.setFrag(f); int parityBefore = f.getAtomByID(2).getAtomParity().getParity(); - ComponentProcessor processor = new ComponentProcessor(mock(SuffixRules.class), state, mock(Element.class)); + ComponentProcessor processor = new ComponentProcessor(state, mock(SuffixApplier.class)); processor.applyDlStereochemistryToCarbohydrate(carbohydrateEl, "d"); assertEquals(parityBefore, f.getAtomByID(2).getAtomParity().getParity()); } @Test public void testDLStereochemistryInvertedNaturalOnCarbohydrate1() throws ComponentGenerationException, StructureBuildingException{ - BuildState state = new BuildState(mock(NameToStructureConfig.class), new SMILESFragmentBuilder()); + BuildState state = new BuildState(mock(NameToStructureConfig.class)); Fragment f = state.fragManager.buildSMILES("N[C@@H](C)C"); - Element carbohydrateEl = new Element(GROUP_EL); + Element carbohydrateEl = new TokenEl(GROUP_EL); carbohydrateEl.addAttribute(new Attribute(NATURALENTISOPPOSITE_ATR, "yes")); - state.xmlFragmentMap.put(carbohydrateEl, f); + carbohydrateEl.setFrag(f); int parityBefore = f.getAtomByID(2).getAtomParity().getParity(); - ComponentProcessor processor = new ComponentProcessor(mock(SuffixRules.class), state, mock(Element.class)); + ComponentProcessor processor = new ComponentProcessor(state, mock(SuffixApplier.class)); processor.applyDlStereochemistryToCarbohydrate(carbohydrateEl, "l"); assertEquals(parityBefore, f.getAtomByID(2).getAtomParity().getParity()); } @Test public void testDLStereochemistryInvertedNaturalOnCarbohydrate2() throws ComponentGenerationException, StructureBuildingException{ - BuildState state = new BuildState(mock(NameToStructureConfig.class), new SMILESFragmentBuilder()); + BuildState state = new BuildState(mock(NameToStructureConfig.class)); Fragment f = state.fragManager.buildSMILES("N[C@@H](C)C"); - Element carbohydrateEl = new Element(GROUP_EL); + Element carbohydrateEl = new TokenEl(GROUP_EL); carbohydrateEl.addAttribute(new Attribute(NATURALENTISOPPOSITE_ATR, "yes")); - state.xmlFragmentMap.put(carbohydrateEl, f); + carbohydrateEl.setFrag(f); int parityBefore = f.getAtomByID(2).getAtomParity().getParity(); - ComponentProcessor processor = new ComponentProcessor(mock(SuffixRules.class), state, mock(Element.class)); + ComponentProcessor processor = new ComponentProcessor(state, mock(SuffixApplier.class)); processor.applyDlStereochemistryToCarbohydrate(carbohydrateEl, "d"); assertEquals(parityBefore, -f.getAtomByID(2).getAtomParity().getParity()); } @Test public void testDStereochemistryDOnCarbohydratePrefix() throws ComponentGenerationException, StructureBuildingException{ - Element prefix = new Element(STEREOCHEMISTRY_EL); + Element prefix = new TokenEl(STEREOCHEMISTRY_EL); prefix.addAttribute(new Attribute(TYPE_ATR, CARBOHYDRATECONFIGURATIONPREFIX_TYPE_VAL)); prefix.addAttribute(new Attribute(VALUE_ATR, "l/r"));//D-threo ComponentProcessor.applyDlStereochemistryToCarbohydrateConfigurationalPrefix(prefix, "d"); @@ -226,7 +225,7 @@ @Test public void testLStereochemistryDOnCarbohydratePrefix() throws ComponentGenerationException, StructureBuildingException{ - Element prefix = new Element(STEREOCHEMISTRY_EL); + Element prefix = new TokenEl(STEREOCHEMISTRY_EL); prefix.addAttribute(new Attribute(TYPE_ATR, CARBOHYDRATECONFIGURATIONPREFIX_TYPE_VAL)); prefix.addAttribute(new Attribute(VALUE_ATR, "r/l")); ComponentProcessor.applyDlStereochemistryToCarbohydrateConfigurationalPrefix(prefix, "l"); @@ -235,7 +234,7 @@ @Test public void testDLStereochemistryDOnCarbohydratePrefix() throws ComponentGenerationException, StructureBuildingException{ - Element prefix = new Element(STEREOCHEMISTRY_EL); + Element prefix = new TokenEl(STEREOCHEMISTRY_EL); prefix.addAttribute(new Attribute(TYPE_ATR, CARBOHYDRATECONFIGURATIONPREFIX_TYPE_VAL)); prefix.addAttribute(new Attribute(VALUE_ATR, "l/r")); ComponentProcessor.applyDlStereochemistryToCarbohydrateConfigurationalPrefix(prefix, "dl"); diff -Nru opsin-1.5.0/opsin-core/src/test/java/uk/ac/cam/ch/wwmm/opsin/CycleDetectorTest.java opsin-2.3.1/opsin-core/src/test/java/uk/ac/cam/ch/wwmm/opsin/CycleDetectorTest.java --- opsin-1.5.0/opsin-core/src/test/java/uk/ac/cam/ch/wwmm/opsin/CycleDetectorTest.java 2013-07-21 14:02:29.000000000 +0000 +++ opsin-2.3.1/opsin-core/src/test/java/uk/ac/cam/ch/wwmm/opsin/CycleDetectorTest.java 2017-07-23 20:55:18.000000000 +0000 @@ -1,37 +1,30 @@ package uk.ac.cam.ch.wwmm.opsin; -import static junit.framework.Assert.assertEquals; +import static org.junit.Assert.*; import java.util.HashSet; import java.util.List; import java.util.Set; -import org.junit.Before; import org.junit.Test; import uk.ac.cam.ch.wwmm.opsin.Fragment; //Cycle detection is performed as part of fragment creation so we can just check the output of fragment creation public class CycleDetectorTest { - private FragmentManager fm; - - @Before - public void setup(){ - fm = new FragmentManager(new SMILESFragmentBuilder(), new IDManager()); - } + private SMILESFragmentBuilder sBuilder = new SMILESFragmentBuilder(new IDManager()); @Test public void testAssignCyclic1() throws StructureBuildingException { - Fragment frag = fm.buildSMILES("CCCC"); + Fragment frag = sBuilder.build("CCCC"); for (Atom a : frag.getAtomList()) { assertEquals("Should be acylic", false, a.getAtomIsInACycle()); } } - - + @Test public void testAssignCyclic2() throws StructureBuildingException { - Fragment frag = fm.buildSMILES("c1ccccc1"); + Fragment frag = sBuilder.build("c1ccccc1"); for (Atom a : frag.getAtomList()) { assertEquals("Should be cylic", true, a.getAtomIsInACycle()); } @@ -39,7 +32,7 @@ @Test public void testAssignCyclic3() throws StructureBuildingException { - Fragment frag = fm.buildSMILES("c12.c23.c34.c45.c56.c61"); + Fragment frag = sBuilder.build("c12.c23.c34.c45.c56.c61"); for (Atom a : frag.getAtomList()) { assertEquals("Should be cylic", true, a.getAtomIsInACycle()); } @@ -47,7 +40,7 @@ @Test public void testAssignCyclic4() throws StructureBuildingException { - Fragment frag = fm.buildSMILES("c1ccccc1CCc1ccccc1"); + Fragment frag = sBuilder.build("c1ccccc1CCc1ccccc1"); List atomList = frag.getAtomList(); for (int i = 0; i < atomList.size(); i++) { Atom a = atomList.get(i); @@ -62,7 +55,7 @@ @Test public void testAssignCyclic5() throws StructureBuildingException { - Fragment frag = fm.buildSMILES("CCc1ccc(O)cc1"); + Fragment frag = sBuilder.build("CCc1ccc(O)cc1"); List atomList = frag.getAtomList(); for (int i = 0; i < atomList.size(); i++) { Atom a = atomList.get(i); @@ -77,7 +70,7 @@ @Test public void testAssignCyclic6() throws StructureBuildingException { - Fragment frag = fm.buildSMILES("CC1CC(O1)C"); + Fragment frag = sBuilder.build("CC1CC(O1)C"); List atomList = frag.getAtomList(); for (int i = 0; i < atomList.size(); i++) { Atom a = atomList.get(i); @@ -92,7 +85,7 @@ @Test public void testFindPathBetweenAtoms1() throws StructureBuildingException { - Fragment frag = fm.buildSMILES("c1ccccc1"); + Fragment frag = sBuilder.build("c1ccccc1"); List atomList = frag.getAtomList(); List> paths = CycleDetector.getPathBetweenAtomsUsingBonds(atomList.get(0), atomList.get(3), frag.getBondSet()); assertEquals(2, paths.size()); @@ -112,7 +105,7 @@ @Test public void testFindPathBetweenAtoms2() throws StructureBuildingException { - Fragment frag = fm.buildSMILES("C1CCCC2CCCCC12"); + Fragment frag = sBuilder.build("C1CCCC2CCCCC12"); List atomList = frag.getAtomList(); Set bonds = new HashSet(frag.getBondSet()); bonds.remove(atomList.get(4).getBondToAtom(atomList.get(9))); @@ -142,7 +135,7 @@ @Test public void testFindPathBetweenAtoms3() throws StructureBuildingException { - Fragment frag = fm.buildSMILES("C1(C)CCCC2C(C)CCCC12"); + Fragment frag = sBuilder.build("C1(C)CCCC2C(C)CCCC12"); List atomList = frag.getAtomList(); Set bonds = new HashSet(frag.getBondSet()); bonds.remove(atomList.get(0).getBondToAtom(atomList.get(1))); diff -Nru opsin-1.5.0/opsin-core/src/test/java/uk/ac/cam/ch/wwmm/opsin/DtdTest.java opsin-2.3.1/opsin-core/src/test/java/uk/ac/cam/ch/wwmm/opsin/DtdTest.java --- opsin-1.5.0/opsin-core/src/test/java/uk/ac/cam/ch/wwmm/opsin/DtdTest.java 2013-07-21 14:02:29.000000000 +0000 +++ opsin-2.3.1/opsin-core/src/test/java/uk/ac/cam/ch/wwmm/opsin/DtdTest.java 2017-07-23 20:55:18.000000000 +0000 @@ -1,24 +1,23 @@ package uk.ac.cam.ch.wwmm.opsin; -import java.io.IOException; +import static org.junit.Assert.assertTrue; + import java.net.URI; import java.net.URISyntaxException; import java.net.URL; -import java.util.ArrayList; -import java.util.List; +import java.util.HashSet; +import java.util.Set; import javax.xml.parsers.DocumentBuilder; import javax.xml.parsers.DocumentBuilderFactory; - -import nu.xom.Document; -import nu.xom.Element; -import nu.xom.Elements; +import javax.xml.stream.XMLStreamConstants; +import javax.xml.stream.XMLStreamException; +import javax.xml.stream.XMLStreamReader; import org.junit.Test; import org.xml.sax.ErrorHandler; import org.xml.sax.SAXException; import org.xml.sax.SAXParseException; -import static junit.framework.Assert.*; public class DtdTest { private final static String RESOURCE_LOCATION = "uk/ac/cam/ch/wwmm/opsin/resources/"; @@ -26,11 +25,14 @@ @Test public void testTokenFiles() throws Exception { - Document tokenFileDoc = resourceGetter.getXMLDocument("index.xml"); - Elements tokenFiles = tokenFileDoc.getRootElement().getChildElements(); - for (int i = 0; i < tokenFiles.size(); i++) { - validate(getUriForFile(tokenFiles.get(i).getValue())); + XMLStreamReader reader = resourceGetter.getXMLStreamReader("index.xml"); + while (reader.hasNext()) { + if (reader.next() == XMLStreamConstants.START_ELEMENT && + reader.getLocalName().equals("tokenFile")) { + validate(getUriForFile(reader.getElementText())); + } } + reader.close(); } @Test @@ -59,35 +61,61 @@ } @Test - public void testTokenFilesValueValidity() throws IOException { - Document tokenFileDoc = resourceGetter.getXMLDocument("index.xml"); - Elements tokenFiles = tokenFileDoc.getRootElement().getChildElements(); - for (int i = 0; i < tokenFiles.size(); i++) { - Element rootElement = resourceGetter.getXMLDocument(tokenFiles.get(i).getValue()).getRootElement(); - List tokenLists =new ArrayList(); - if (rootElement.getLocalName().equals("tokenLists")){//support for xml files with one "tokenList" or multiple "tokenList" under a "tokenLists" element - Elements children =rootElement.getChildElements(); - for (int j = 0; j terms = new HashSet(); + while (reader.hasNext()) { + switch (reader.next()) { + case XMLStreamConstants.START_ELEMENT: + if (reader.getLocalName().equals("token")) { + String tokenString = reader.getElementText(); + assertTrue(tokenString +" occurred more than once in a tokenList",!terms.contains(tokenString)); + terms.add(tokenString); char[] characters = tokenString.toCharArray(); for (char c : characters) { - assertTrue("Non ascii character found in token: " + tokenString +" , an ASCII replacement should be used!" ,(int)c < 128); + assertTrue("Non ascii character found in token: " + tokenString + OpsinTools.NEWLINE + "An ASCII replacement should be used!" ,(int)c < 128); + assertTrue("Capital letter found in token: " + tokenString + OpsinTools.NEWLINE + "Only lower case letters should be used!" , !(c >='A' && c <='Z')); } - assertEquals("The following token contains upper case characters!: " +tokenString,tokenString.toLowerCase(), tokenString); } + break; + case XMLStreamConstants.END_ELEMENT: + if (reader.getLocalName().equals("tokenList")) { + return; + } + break; } } } - + public static void validate(URI uri) throws Exception { System.out.println("Validating:"+ uri); DocumentBuilderFactory f = DocumentBuilderFactory.newInstance(); diff -Nru opsin-1.5.0/opsin-core/src/test/java/uk/ac/cam/ch/wwmm/opsin/FragmentManagerTest.java opsin-2.3.1/opsin-core/src/test/java/uk/ac/cam/ch/wwmm/opsin/FragmentManagerTest.java --- opsin-1.5.0/opsin-core/src/test/java/uk/ac/cam/ch/wwmm/opsin/FragmentManagerTest.java 2013-07-21 14:02:29.000000000 +0000 +++ opsin-2.3.1/opsin-core/src/test/java/uk/ac/cam/ch/wwmm/opsin/FragmentManagerTest.java 2017-07-23 20:55:18.000000000 +0000 @@ -1,6 +1,6 @@ package uk.ac.cam.ch.wwmm.opsin; -import static junit.framework.Assert.*; +import static org.junit.Assert.*; import java.io.IOException; import java.util.ArrayList; @@ -8,14 +8,14 @@ import org.junit.Before; import org.junit.Test; - public class FragmentManagerTest { FragmentManager fragManager; @Before public void setUp() throws IOException{ - fragManager = new FragmentManager(new SMILESFragmentBuilder(), new IDManager()); + IDManager idManager = new IDManager(); + fragManager = new FragmentManager(new SMILESFragmentBuilder(idManager), idManager); } @Test @@ -23,21 +23,21 @@ Fragment frag1 = fragManager.buildSMILES("CC"); Fragment frag2 = fragManager.buildSMILES("CNC"); - fragManager.createBond(frag1.getAtomByLocant("1"), frag2.getAtomByLocant("1"), 1); + fragManager.createBond(frag1.getFirstAtom(), frag2.getFirstAtom(), 1); Fragment frag = fragManager.getUnifiedFragment(); - assertEquals("Frag has five atoms", 5, frag.getAtomList().size()); + assertEquals("Frag has five atoms", 5, frag.getAtomCount()); assertEquals("Frag has four bonds", 4, frag.getBondSet().size()); } @Test public void testRelabelFusedRingSystem() throws StructureBuildingException { Fragment naphthalene = fragManager.buildSMILES("C1=CC=CC2=CC=CC=C12"); - FragmentTools.relabelFusedRingSystem(naphthalene); + FragmentTools.relabelLocantsAsFusedRingSystem(naphthalene.getAtomList()); assertEquals("Locant 1 = atom 1", 1, naphthalene.getIDFromLocant("1")); assertEquals("Locant 4a = atom 5", 5, naphthalene.getIDFromLocant("4a")); assertEquals("Locant 8 = atom 9", 9, naphthalene.getIDFromLocant("8")); assertEquals("Locant 8a = atom 10", 10, naphthalene.getIDFromLocant("8a")); - assertEquals("No locant 9", 0, naphthalene.getIDFromLocant("")); + assertEquals("No locant 9", 0, naphthalene.getIDFromLocant("9")); } @Test @@ -50,7 +50,7 @@ assertNull(urea.getAtomByLocant("N'''")); Fragment primedCopy = fragManager.copyAndRelabelFragment(urea, 1); - assertEquals(4, primedCopy.getAtomList().size()); + assertEquals(4, primedCopy.getAtomCount()); assertNull(primedCopy.getAtomByLocant("N")); assertNull(primedCopy.getAtomByLocant("N'")); assertNotNull(primedCopy.getAtomByLocant("N''")); diff -Nru opsin-1.5.0/opsin-core/src/test/java/uk/ac/cam/ch/wwmm/opsin/FragmentTest.java opsin-2.3.1/opsin-core/src/test/java/uk/ac/cam/ch/wwmm/opsin/FragmentTest.java --- opsin-1.5.0/opsin-core/src/test/java/uk/ac/cam/ch/wwmm/opsin/FragmentTest.java 2013-07-21 14:02:29.000000000 +0000 +++ opsin-2.3.1/opsin-core/src/test/java/uk/ac/cam/ch/wwmm/opsin/FragmentTest.java 2017-07-23 20:55:18.000000000 +0000 @@ -6,17 +6,23 @@ import org.junit.Before; import org.junit.Test; -import static junit.framework.Assert.*; +import static org.junit.Assert.*; import static uk.ac.cam.ch.wwmm.opsin.XmlDeclarations.*; public class FragmentTest { private Fragment frag; - private FragmentManager fm = new FragmentManager(new SMILESFragmentBuilder(), new IDManager()); + private FragmentManager fm; @Before public void setUp(){ - frag = new Fragment(); + IDManager idManager = new IDManager(); + fm = new FragmentManager(new SMILESFragmentBuilder(idManager), idManager); + try { + frag = fm.buildSMILES(""); + } catch (StructureBuildingException e) { + throw new RuntimeException(e); + } } @Test @@ -24,37 +30,17 @@ assertNotNull("Has atom list", frag.getAtomList()); } - //FIXME Argh! I hate namespaces! - /*public void testtoCMLMolecule() { - Element elem = frag.toCMLMolecule(); - assertNotNull("Got an Element", elem); - assertEquals("Element is a cml tag", elem.getLocalName(), "cml"); - frag.addAtom(new Atom(1, 1, "C", frag)); - elem = frag.toCMLMolecule(); - assertEquals("foo", elem.getFirstChildElement("molecule").toXML(), "foo bar"); - assertEquals("Element has 1 atom greatgrandchild", 1, elem.getFirstChildElement("molecule") - .getFirstChildElement("atomArray").getChildElements("atom").size()); - frag.addAtom(new Atom(2, 2, "C", frag)); - elem = frag.toCMLMolecule(); - assertEquals("Element has 2 atom greatgrandchildren", 2, elem.getFirstChildElement("molecule") - .getFirstChildElement("atomArray").getChildElements("atom").size()); - frag.addBond(new Bond(1, 2, 1)); - elem = frag.toCMLMolecule(); - assertEquals("Element has 1 bond greatgrandchildren", 1, elem.getFirstChildElement("molecule") - .getFirstChildElement("bondArray").getChildElements("bond").size()); - }*/ - @Test public void testAddAtom() { - assertEquals("Has no atoms", 0, frag.getAtomList().size()); - frag.addAtom(new Atom(1, "C", frag)); - assertEquals("Now has one atom", 1, frag.getAtomList().size()); + assertEquals("Has no atoms", 0, frag.getAtomCount()); + frag.addAtom(new Atom(1, ChemEl.C, frag)); + assertEquals("Now has one atom", 1, frag.getAtomCount()); } @Test public void testAddBond() { - frag.addAtom(new Atom(1, "C", frag)); - frag.addAtom(new Atom(2, "C", frag)); + frag.addAtom(new Atom(1, ChemEl.C, frag)); + frag.addAtom(new Atom(2, ChemEl.C, frag)); assertEquals("Has no bonds", 0, frag.getBondSet().size()); fm.createBond(frag.getAtomByID(1), frag.getAtomByID(2), 1); assertEquals("Now has one bond", 1, frag.getBondSet().size()); @@ -64,19 +50,54 @@ public void testImportFrag() throws StructureBuildingException { Fragment frag1 = fm.buildSMILES("CC"); Fragment frag2 = fm.buildSMILES("CC"); - assertEquals("Fragment has two atoms", 2, frag1.getAtomList().size()); + assertEquals("Fragment has two atoms", 2, frag1.getAtomCount()); assertEquals("Fragment has one bond", 1, frag1.getBondSet().size()); fm.incorporateFragment(frag2, frag1); - assertEquals("Fragment now has four atoms", 4, frag1.getAtomList().size()); + assertEquals("Fragment now has four atoms", 4, frag1.getAtomCount()); assertEquals("Fragment now has two bonds", 2, frag1.getBondSet().size()); } + + @Test + public void testImportFragWithIntraFragBonds1() throws StructureBuildingException { + Fragment frag1 = fm.buildSMILES("C"); + Fragment frag2 = fm.buildSMILES("C"); + fm.createBond(frag1.getFirstAtom(), frag2.getFirstAtom(), 1); + assertEquals(0, frag1.getBondSet().size()); + assertEquals(0, frag2.getBondSet().size()); + assertEquals(1, fm.getInterFragmentBonds(frag1).size()); + assertEquals(1, fm.getInterFragmentBonds(frag2).size()); + fm.incorporateFragment(frag2, frag1); + assertEquals(1, frag1.getBondSet().size()); + assertEquals(0, frag2.getBondSet().size()); + assertEquals(0, fm.getInterFragmentBonds(frag1).size()); + } + + @Test + public void testImportFragWithIntraFragBonds2() throws StructureBuildingException { + Fragment frag1 = fm.buildSMILES("C"); + Fragment frag2 = fm.buildSMILES("C"); + Fragment frag3 = fm.buildSMILES("C"); + fm.createBond(frag2.getFirstAtom(), frag3.getFirstAtom(), 1); + assertEquals(0, frag1.getBondSet().size()); + assertEquals(0, frag2.getBondSet().size()); + assertEquals(0, frag3.getBondSet().size()); + assertEquals(0, fm.getInterFragmentBonds(frag1).size()); + assertEquals(1, fm.getInterFragmentBonds(frag2).size()); + assertEquals(1, fm.getInterFragmentBonds(frag3).size()); + fm.incorporateFragment(frag2, frag1); + assertEquals(0, frag1.getBondSet().size()); + assertEquals(0, frag2.getBondSet().size()); + assertEquals(0, frag3.getBondSet().size()); + assertEquals(1, fm.getInterFragmentBonds(frag1).size()); + assertEquals(1, fm.getInterFragmentBonds(frag3).size()); + } @Test - public void testGetIDFromLocant() throws StructureBuildingException { - Atom atom = new Atom(10, "C", frag); + public void testGetIDFromLocant() { + Atom atom = new Atom(10, ChemEl.C, frag); atom.addLocant("a"); frag.addAtom(atom); - atom = new Atom(20, "C", frag); + atom = new Atom(20, ChemEl.C, frag); atom.addLocant("silly"); frag.addAtom(atom); assertEquals("Locant a has ID 10", 10, frag.getIDFromLocant("a")); @@ -85,11 +106,11 @@ } @Test - public void testGetAtomByLocant() throws StructureBuildingException { - Atom atom1 = new Atom(10, "C", frag); + public void testGetAtomByLocant() { + Atom atom1 = new Atom(10, ChemEl.C, frag); atom1.addLocant("a"); frag.addAtom(atom1); - Atom atom2 = new Atom(20, "C", frag); + Atom atom2 = new Atom(20, ChemEl.C, frag); atom2.addLocant("silly"); frag.addAtom(atom2); assertEquals("Locant a gets atom1", atom1, frag.getAtomByLocant("a")); @@ -98,10 +119,10 @@ } @Test - public void testGetAtomByID() throws StructureBuildingException { - Atom atom1 = new Atom(10, "C", frag); + public void testGetAtomByID() { + Atom atom1 = new Atom(10, ChemEl.C, frag); frag.addAtom(atom1); - Atom atom2 = new Atom(20, "C", frag); + Atom atom2 = new Atom(20, ChemEl.C, frag); frag.addAtom(atom2); assertEquals("ID 10 gets atom1", atom1, frag.getAtomByID(10)); assertEquals("ID 20 gets atom2", atom2, frag.getAtomByID(20)); @@ -109,11 +130,11 @@ } @Test - public void testFindBond() throws StructureBuildingException { - frag.addAtom(new Atom(1, "C", frag)); - frag.addAtom(new Atom(2, "C", frag)); - frag.addAtom(new Atom(3, "N", frag)); - frag.addAtom(new Atom(4, "O", frag)); + public void testFindBond() { + frag.addAtom(new Atom(1, ChemEl.C, frag)); + frag.addAtom(new Atom(2, ChemEl.C, frag)); + frag.addAtom(new Atom(3, ChemEl.N, frag)); + frag.addAtom(new Atom(4, ChemEl.O, frag)); fm.createBond(frag.getAtomByID(2), frag.getAtomByID(4), 2); fm.createBond(frag.getAtomByID(1), frag.getAtomByID(2), 1); fm.createBond(frag.getAtomByID(1), frag.getAtomByID(3), 3); @@ -128,23 +149,23 @@ } @Test - public void testGetChainLength() throws StructureBuildingException { + public void testGetChainLength() { assertEquals("No chain", 0, frag.getChainLength()); - Atom a1 =new Atom(1, "C", frag); + Atom a1 =new Atom(1, ChemEl.C, frag); a1.addLocant("1"); frag.addAtom(a1); assertEquals("Methane", 1, frag.getChainLength()); - Atom a2 =new Atom(2, "C", frag); + Atom a2 =new Atom(2, ChemEl.C, frag); a2.addLocant("2"); frag.addAtom(a2); fm.createBond(frag.getAtomByID(1), frag.getAtomByID(2), 1); assertEquals("ethane", 2, frag.getChainLength()); - Atom a3 =new Atom(3, "C", frag); + Atom a3 =new Atom(3, ChemEl.C, frag); a3.addLocant("3"); frag.addAtom(a3); fm.createBond(frag.getAtomByID(2), frag.getAtomByID(3), 1); assertEquals("propane", 3, frag.getChainLength()); - Atom a4 =new Atom(4, "C", frag); + Atom a4 =new Atom(4, ChemEl.C, frag); frag.addAtom(a4); a4.addLocant("4"); fm.createBond(frag.getAtomByID(2), frag.getAtomByID(4), 1); @@ -363,16 +384,14 @@ @Test public void testIndicatedHydrogen() throws StructureBuildingException { - SMILESFragmentBuilder sBuilder = new SMILESFragmentBuilder(); - Fragment pyrrole = sBuilder.build("[nH]1cccc1", fm); + Fragment pyrrole = fm.buildSMILES("[nH]1cccc1"); assertEquals("Pyrrole has 1 indicated hydrogen", 1, pyrrole.getIndicatedHydrogen().size()); assertEquals("..and the indicated hydrogen is on the nitrogen", pyrrole.getFirstAtom(), pyrrole.getIndicatedHydrogen().get(0)); } @Test public void testSpareValenciesOnAromaticAtoms() throws StructureBuildingException{ - SMILESFragmentBuilder sBuilder = new SMILESFragmentBuilder(); - Fragment naphthalene = sBuilder.build("c1cccc2ccccc12", fm); + Fragment naphthalene = fm.buildSMILES("c1cccc2ccccc12"); for(Atom a : naphthalene.getAtomList()) { assertEquals("All atoms have sv", true, a.hasSpareValency()); } @@ -383,23 +402,22 @@ @Test public void testConvertSpareValenciesToDoubleBonds() throws StructureBuildingException{ - SMILESFragmentBuilder sBuilder = new SMILESFragmentBuilder(); - Fragment dhp = sBuilder.build("c1cCccC1", fm); + Fragment dhp = fm.buildSMILES("c1cCccC1"); FragmentTools.convertSpareValenciesToDoubleBonds(dhp); for(Atom a : dhp.getAtomList()) { assertEquals("All atoms have no sv", false, a.hasSpareValency()); } - Fragment funnydiene = sBuilder.build("C(=C)C=C", fm); + Fragment funnydiene = fm.buildSMILES("C(=C)C=C"); FragmentTools.convertSpareValenciesToDoubleBonds(funnydiene); for(Atom a : funnydiene.getAtomList()) { assertEquals("All atoms have no sv", false, a.hasSpareValency()); } - Fragment naphthalene = sBuilder.build("c1cccc2ccccc12", fm); + Fragment naphthalene = fm.buildSMILES("c1cccc2ccccc12"); FragmentTools.convertSpareValenciesToDoubleBonds(naphthalene); for(Atom a : naphthalene.getAtomList()) { assertEquals("All atoms have no sv", false, a.hasSpareValency()); } - Fragment pentalene = sBuilder.build("c12c(ccc1)ccc2", fm); + Fragment pentalene = fm.buildSMILES("c12c(ccc1)ccc2"); for(Atom a : pentalene.getAtomList()) { assertEquals("All atoms have sv", true, a.hasSpareValency()); } @@ -412,12 +430,54 @@ @Test public void testGetAtomNeighbours() throws StructureBuildingException{ - SMILESFragmentBuilder sBuilder = new SMILESFragmentBuilder(); - Fragment naphthalene = sBuilder.build("C1=CC=CC2=CC=CC=C12", fm); + Fragment naphthalene = fm.buildSMILES("C1=CC=CC2=CC=CC=C12"); assertEquals("Atom 1 has two neighbours", 2, naphthalene.getIntraFragmentAtomNeighbours(naphthalene.getAtomByID(1)).size()); assertEquals("Atom 5 has three neighbours", 3, naphthalene.getIntraFragmentAtomNeighbours(naphthalene.getAtomByID(5)).size()); } + + @Test + public void testIsCharacteristicAtomSuffix() throws StructureBuildingException{ + Fragment parent = fm.buildSMILES("CC"); + Fragment suffix = fm.buildSMILES("N", SUFFIX_TYPE_VAL, NONE_LABELS_VAL); + fm.incorporateFragment(suffix, suffix.getFirstAtom(), parent, parent.getFirstAtom(), 1); + List parentAtoms = parent.getAtomList(); + assertFalse(FragmentTools.isCharacteristicAtom(parentAtoms.get(0))); + assertFalse(FragmentTools.isCharacteristicAtom(parentAtoms.get(1))); + assertTrue(FragmentTools.isCharacteristicAtom(parentAtoms.get(2))); + } + + @Test + public void testIsCharacteristicAtomAldehyde() throws StructureBuildingException{ + Fragment parent = fm.buildSMILES("CC"); + Fragment suffix = fm.buildSMILES("O", SUFFIX_TYPE_VAL, NONE_LABELS_VAL); + fm.incorporateFragment(suffix, suffix.getFirstAtom(), parent, parent.getFirstAtom(), 2); + List parentAtoms = parent.getAtomList(); + parentAtoms.get(1).setProperty(Atom.ISALDEHYDE, true); + assertFalse(FragmentTools.isCharacteristicAtom(parentAtoms.get(0))); + assertTrue(FragmentTools.isCharacteristicAtom(parentAtoms.get(1))); + assertTrue(FragmentTools.isCharacteristicAtom(parentAtoms.get(2))); + } + + @Test + public void testIsCharacteristicAtomFunctionalAtom() throws StructureBuildingException{ + Fragment parent = fm.buildSMILES("CC(=O)[O-]"); + List parentAtoms = parent.getAtomList(); + parent.addFunctionalAtom(parentAtoms.get(3)); + for (int i = 0; i < parentAtoms.size() - 1; i++) { + assertFalse(FragmentTools.isCharacteristicAtom(parentAtoms.get(i))); + } + assertTrue(FragmentTools.isCharacteristicAtom(parentAtoms.get(parentAtoms.size() - 1))); + } + + @Test + public void testIsCharacteristicAtomHydroxy() throws StructureBuildingException{ + List phenolAtoms = fm.buildSMILES("Oc1ccccc1").getAtomList(); + assertTrue(FragmentTools.isCharacteristicAtom(phenolAtoms.get(0))); + for (int i = 1; i < phenolAtoms.size(); i++) { + assertFalse(FragmentTools.isCharacteristicAtom(phenolAtoms.get(i))); + } + } } diff -Nru opsin-1.5.0/opsin-core/src/test/java/uk/ac/cam/ch/wwmm/opsin/FusedRingNumbererFunctionsTest.java opsin-2.3.1/opsin-core/src/test/java/uk/ac/cam/ch/wwmm/opsin/FusedRingNumbererFunctionsTest.java --- opsin-1.5.0/opsin-core/src/test/java/uk/ac/cam/ch/wwmm/opsin/FusedRingNumbererFunctionsTest.java 2013-07-21 14:02:29.000000000 +0000 +++ opsin-2.3.1/opsin-core/src/test/java/uk/ac/cam/ch/wwmm/opsin/FusedRingNumbererFunctionsTest.java 2017-07-23 20:55:18.000000000 +0000 @@ -1,7 +1,8 @@ package uk.ac.cam.ch.wwmm.opsin; -import org.junit.Test; -import static junit.framework.Assert.*; +import static org.junit.Assert.*; + +import org.junit.Test; public class FusedRingNumbererFunctionsTest { diff -Nru opsin-1.5.0/opsin-core/src/test/java/uk/ac/cam/ch/wwmm/opsin/FusedRingNumbererTest.java opsin-2.3.1/opsin-core/src/test/java/uk/ac/cam/ch/wwmm/opsin/FusedRingNumbererTest.java --- opsin-1.5.0/opsin-core/src/test/java/uk/ac/cam/ch/wwmm/opsin/FusedRingNumbererTest.java 2013-07-21 14:02:29.000000000 +0000 +++ opsin-2.3.1/opsin-core/src/test/java/uk/ac/cam/ch/wwmm/opsin/FusedRingNumbererTest.java 2017-07-23 20:55:18.000000000 +0000 @@ -1,11 +1,10 @@ package uk.ac.cam.ch.wwmm.opsin; -import java.util.List; +import static org.junit.Assert.*; +import static org.mockito.Mockito.mock; -import junit.framework.Assert; +import java.util.List; -import org.junit.AfterClass; -import org.junit.BeforeClass; import org.junit.Ignore; import org.junit.Test; @@ -17,17 +16,7 @@ */ public class FusedRingNumbererTest { - private static FragmentManager fm; - - @BeforeClass - public static void setUp(){ - fm = new FragmentManager(new SMILESFragmentBuilder(), new IDManager()); - } - - @AfterClass - public static void cleanUp(){ - fm = null; - } + private SMILESFragmentBuilder sBuilder = new SMILESFragmentBuilder(new IDManager()); @Test public void aceanthrene() throws StructureBuildingException { @@ -430,6 +419,23 @@ public void anthyridine() throws StructureBuildingException { compareNumbering("n1cccc2cc3cccnc3nc12", "1/2/3/4/4a/5/5a/6/7/8/9/9a/10/10a"); } + + @Test + public void benzo_cd_azulene() throws StructureBuildingException { + compareNumbering("c1cc2cccc3ccccc1c23", "1/2/2a/3/4/5/5a/6/7/8/9/9a/9b"); + } + + @Test + public void indeno_7_1_cd_azepine() throws StructureBuildingException { + compareNumbering("c1nccc2ccc3cccc1c23", "1/2/3/4/4a/5/6/6a/7/8/9/9a/9b"); + } + + @Test + @Ignore + public void tripleSubstituedSevenMembered() throws StructureBuildingException { + compareNumbering("C1NCCN2c3ncccc3Cc4ccccc4C12", "1/2/3/4/5/5a/6/7/8/9/9a/10/10a/11/12/13/14/14a/14b"); + compareNumbering("c1cccc2C3CNCCN3c4ncccc4Cc12", "1/2/3/4/5/5a/6/7/8/9/9a/10/10a/11/12/13/14/14a/14b"); + } /** * Takes smiles and expected labels for a fused ring. Generates the fused ring, numbers it then compares to the given slash delimited labels @@ -438,13 +444,14 @@ * @throws StructureBuildingException */ private void compareNumbering(String smiles, String labels) throws StructureBuildingException { - Fragment fusedRing =fm.buildSMILES(smiles, "", "none"); + Fragment fusedRing = sBuilder.build(smiles, mock(Element.class), XmlDeclarations.NONE_LABELS_VAL); String[] labelArray =labels.split("/", -1); FusedRingNumberer.numberFusedRing(fusedRing); List atomList =fusedRing.getAtomList(); + assertEquals(atomList.size(), labelArray.length);//bug in test if not true! for (int i = 0; i < atomList.size(); i++) { if (!labelArray[i].equals("")){//exterior atom locant - Assert.assertEquals(labelArray[i],atomList.get(i).getFirstLocant()); + assertEquals(labelArray[i],atomList.get(i).getFirstLocant()); } } } diff -Nru opsin-1.5.0/opsin-core/src/test/java/uk/ac/cam/ch/wwmm/opsin/HeteroAtomReplacementTest.java opsin-2.3.1/opsin-core/src/test/java/uk/ac/cam/ch/wwmm/opsin/HeteroAtomReplacementTest.java --- opsin-1.5.0/opsin-core/src/test/java/uk/ac/cam/ch/wwmm/opsin/HeteroAtomReplacementTest.java 2013-07-21 14:02:29.000000000 +0000 +++ opsin-2.3.1/opsin-core/src/test/java/uk/ac/cam/ch/wwmm/opsin/HeteroAtomReplacementTest.java 2017-07-23 20:55:18.000000000 +0000 @@ -1,6 +1,8 @@ package uk.ac.cam.ch.wwmm.opsin; -import static junit.framework.Assert.*; + +import static org.junit.Assert.*; import static org.mockito.Mockito.mock; + import org.junit.Before; import org.junit.Test; @@ -11,8 +13,9 @@ @Before public void setUp() { - fragManager = new FragmentManager(new SMILESFragmentBuilder(), new IDManager()); - a = new Atom(0, "C", mock(Fragment.class)); + IDManager idManager = new IDManager(); + fragManager = new FragmentManager(new SMILESFragmentBuilder(idManager), idManager); + a = new Atom(0, ChemEl.C, mock(Fragment.class)); } @Test @@ -57,7 +60,7 @@ @Test public void replaceNeutralWithCharged() throws StructureBuildingException{ - Atom a = new Atom(0, "C", mock(Fragment.class)); + Atom a = new Atom(0, ChemEl.C, mock(Fragment.class)); fragManager.replaceAtomWithSmiles(a, "[NH4+]"); assertEquals(1, a.getCharge()); assertEquals(1, a.getProtonsExplicitlyAddedOrRemoved()); @@ -66,7 +69,7 @@ @Test public void replaceChargedWithEquallyCharged() throws StructureBuildingException{ - Atom a = new Atom(0, "C", mock(Fragment.class)); + Atom a = new Atom(0, ChemEl.C, mock(Fragment.class)); a.addChargeAndProtons(1, -1); fragManager.replaceAtomWithSmiles(a, "[NH4+]"); assertEquals(1, a.getCharge()); @@ -76,7 +79,7 @@ @Test(expected=StructureBuildingException.class) public void replaceChargedWithUnEquallyCharged() throws StructureBuildingException{ - Atom a = new Atom(0, "C", mock(Fragment.class)); + Atom a = new Atom(0, ChemEl.C, mock(Fragment.class)); a.addChargeAndProtons(1, -1); fragManager.replaceAtomWithSmiles(a, "[NH2-]"); } diff -Nru opsin-1.5.0/opsin-core/src/test/java/uk/ac/cam/ch/wwmm/opsin/NameToStructureTest.java opsin-2.3.1/opsin-core/src/test/java/uk/ac/cam/ch/wwmm/opsin/NameToStructureTest.java --- opsin-1.5.0/opsin-core/src/test/java/uk/ac/cam/ch/wwmm/opsin/NameToStructureTest.java 2013-07-21 14:02:29.000000000 +0000 +++ opsin-2.3.1/opsin-core/src/test/java/uk/ac/cam/ch/wwmm/opsin/NameToStructureTest.java 2017-07-23 20:55:18.000000000 +0000 @@ -1,9 +1,6 @@ package uk.ac.cam.ch.wwmm.opsin; -import static junit.framework.Assert.assertEquals; -import static junit.framework.Assert.assertNotNull; -import static junit.framework.Assert.assertNull; -import nu.xom.Element; +import static org.junit.Assert.*; import org.junit.Test; @@ -19,34 +16,35 @@ @Test public void testParseToCML() { NameToStructure nts = NameToStructure.getInstance(); - Element cml = nts.parseToCML("ethane"); + String cml = nts.parseToCML("ethane"); // output is syntactically valid (schema, dictRefs) // labels assigned and is correct. // contains a molecule with same connectivity as 'frag of CML' assertEquals("Parsing 'ethane'", "" + + "xmlns:nameDict=\"http://www.xml-cml.org/dictionary/cml/name/\">" + + "" + "ethane" + - "" + - "" + - "" + - "" + - "" + - "" + - "" + - "" + + "" + + "" + + "" + + "" + + "" + + "" + + "" + + "" + "" + - "" + - "" + - "" + - "" + - "" + - "" + - "" + - "", cml.toXML()); + "" + + "" + + "" + + "" + + "" + + "" + + "" + + "", cml); assertNull("Won't parse helloworld", nts.parseToCML("helloworld")); } diff -Nru opsin-1.5.0/opsin-core/src/test/java/uk/ac/cam/ch/wwmm/opsin/ParserTest.java opsin-2.3.1/opsin-core/src/test/java/uk/ac/cam/ch/wwmm/opsin/ParserTest.java --- opsin-1.5.0/opsin-core/src/test/java/uk/ac/cam/ch/wwmm/opsin/ParserTest.java 2013-07-21 14:02:29.000000000 +0000 +++ opsin-2.3.1/opsin-core/src/test/java/uk/ac/cam/ch/wwmm/opsin/ParserTest.java 2017-07-23 20:55:18.000000000 +0000 @@ -1,14 +1,12 @@ package uk.ac.cam.ch.wwmm.opsin; + +import static org.junit.Assert.*; + import java.io.IOException; import java.util.List; -import nu.xom.Element; -import static junit.framework.Assert.assertEquals; -import static junit.framework.Assert.assertFalse; import org.junit.AfterClass; import org.junit.BeforeClass; - - import org.junit.Test; public class ParserTest { @@ -50,6 +48,11 @@ public void testParseThrowsWhenNameIsSubstituentOnly() throws ParsingException { parser.parse(config, "chloro"); } + + @Test(expected=ParsingException.class) + public void testNoParseForOneComponentSalt() throws ParsingException { + parser.parse(config, "pyridine salt"); + } @Test public void testConvertStringToComponentRatios1() throws ParsingException { diff -Nru opsin-1.5.0/opsin-core/src/test/java/uk/ac/cam/ch/wwmm/opsin/PolymerTest.java opsin-2.3.1/opsin-core/src/test/java/uk/ac/cam/ch/wwmm/opsin/PolymerTest.java --- opsin-1.5.0/opsin-core/src/test/java/uk/ac/cam/ch/wwmm/opsin/PolymerTest.java 1970-01-01 00:00:00.000000000 +0000 +++ opsin-2.3.1/opsin-core/src/test/java/uk/ac/cam/ch/wwmm/opsin/PolymerTest.java 2017-07-23 20:55:18.000000000 +0000 @@ -0,0 +1,22 @@ +package uk.ac.cam.ch.wwmm.opsin; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNotNull; + +import org.junit.Test; + +public class PolymerTest { + + @Test + public void testSimplePolymer() throws ParsingException { + OpsinResult result = NameToStructure.getInstance().parseChemicalName("poly(oxyethylene)"); + String smiles = result.getSmiles(); + assertNotNull(smiles); + assertEquals(true, smiles.contains("[*:1]")); + assertEquals(true, smiles.contains("[*:2]")); + + String cml = result.getCml(); + assertEquals(true, cml.contains("alpha")); + assertEquals(true, cml.contains("omega")); + } +} diff -Nru opsin-1.5.0/opsin-core/src/test/java/uk/ac/cam/ch/wwmm/opsin/PreProcessorTest.java opsin-2.3.1/opsin-core/src/test/java/uk/ac/cam/ch/wwmm/opsin/PreProcessorTest.java --- opsin-1.5.0/opsin-core/src/test/java/uk/ac/cam/ch/wwmm/opsin/PreProcessorTest.java 2013-07-21 14:02:29.000000000 +0000 +++ opsin-2.3.1/opsin-core/src/test/java/uk/ac/cam/ch/wwmm/opsin/PreProcessorTest.java 2017-07-23 20:55:18.000000000 +0000 @@ -1,6 +1,7 @@ package uk.ac.cam.ch.wwmm.opsin; -import static org.junit.Assert.assertEquals; +import static org.junit.Assert.*; + import org.junit.Test; public class PreProcessorTest { @@ -63,6 +64,6 @@ @Test public void testHtmlGreeks() throws PreProcessingException { assertEquals("alpha-methyl-toluene", PreProcessor.preProcess("α-methyl-toluene")); - assertEquals("BETA-methyl-styrene", PreProcessor.preProcess("&BETA;-methyl-styrene")); + assertEquals("beta-methyl-styrene", PreProcessor.preProcess("&BETA;-methyl-styrene")); } } diff -Nru opsin-1.5.0/opsin-core/src/test/java/uk/ac/cam/ch/wwmm/opsin/RadixTrieTest.java opsin-2.3.1/opsin-core/src/test/java/uk/ac/cam/ch/wwmm/opsin/RadixTrieTest.java --- opsin-1.5.0/opsin-core/src/test/java/uk/ac/cam/ch/wwmm/opsin/RadixTrieTest.java 2013-07-21 14:02:29.000000000 +0000 +++ opsin-2.3.1/opsin-core/src/test/java/uk/ac/cam/ch/wwmm/opsin/RadixTrieTest.java 2017-07-23 20:55:18.000000000 +0000 @@ -1,9 +1,11 @@ package uk.ac.cam.ch.wwmm.opsin; +import static org.junit.Assert.*; + import java.util.List; import org.junit.Test; -import static junit.framework.Assert.*; + public class RadixTrieTest { diff -Nru opsin-1.5.0/opsin-core/src/test/java/uk/ac/cam/ch/wwmm/opsin/SMILESFragmentBuilderTest.java opsin-2.3.1/opsin-core/src/test/java/uk/ac/cam/ch/wwmm/opsin/SMILESFragmentBuilderTest.java --- opsin-1.5.0/opsin-core/src/test/java/uk/ac/cam/ch/wwmm/opsin/SMILESFragmentBuilderTest.java 2013-07-21 14:02:29.000000000 +0000 +++ opsin-2.3.1/opsin-core/src/test/java/uk/ac/cam/ch/wwmm/opsin/SMILESFragmentBuilderTest.java 2017-07-23 20:55:18.000000000 +0000 @@ -1,50 +1,41 @@ package uk.ac.cam.ch.wwmm.opsin; -import static junit.framework.Assert.assertNotNull; import static org.junit.Assert.*; import java.util.List; import java.util.Set; -import junit.framework.Assert; - -import org.junit.Before; import org.junit.Test; import uk.ac.cam.ch.wwmm.opsin.BondStereo.BondStereoValue; public class SMILESFragmentBuilderTest { - private FragmentManager fm; - - @Before - public void setUp(){ - fm = new FragmentManager(new SMILESFragmentBuilder(), new IDManager()); - } + private SMILESFragmentBuilder sBuilder = new SMILESFragmentBuilder(new IDManager()); @Test public void testBuild() throws StructureBuildingException { - Fragment fragment = fm.buildSMILES("C"); + Fragment fragment = sBuilder.build("C"); assertNotNull("Got a fragment", fragment); } @Test public void testSimple1() throws StructureBuildingException { - Fragment fragment = fm.buildSMILES("CC"); + Fragment fragment = sBuilder.build("CC"); List atomList = fragment.getAtomList(); assertEquals(2, atomList.size()); - assertEquals("C", atomList.get(0).getElement()); - assertEquals("C", atomList.get(1).getElement()); + assertEquals(ChemEl.C, atomList.get(0).getElement()); + assertEquals(ChemEl.C, atomList.get(1).getElement()); } @Test public void testSimple2() throws StructureBuildingException { - Fragment fragment = fm.buildSMILES("O=C=O"); + Fragment fragment = sBuilder.build("O=C=O"); List atomList = fragment.getAtomList(); assertEquals(3, atomList.size()); - assertEquals("O", atomList.get(0).getElement()); - assertEquals("C", atomList.get(1).getElement()); - assertEquals("O", atomList.get(2).getElement()); + assertEquals(ChemEl.O, atomList.get(0).getElement()); + assertEquals(ChemEl.C, atomList.get(1).getElement()); + assertEquals(ChemEl.O, atomList.get(2).getElement()); Set bonds = fragment.getBondSet(); assertEquals(2, bonds.size()); for (Bond bond : bonds) { @@ -54,7 +45,7 @@ @Test public void testSimple3() throws StructureBuildingException { - Fragment fragment = fm.buildSMILES("C#N"); + Fragment fragment = sBuilder.build("C#N"); List atomList = fragment.getAtomList(); assertEquals(2, atomList.size()); Set bonds = fragment.getBondSet(); @@ -66,12 +57,12 @@ @Test public void testSimple4() throws StructureBuildingException { - Fragment fragment = fm.buildSMILES("CCN(CC)CC"); + Fragment fragment = sBuilder.build("CCN(CC)CC"); List atomList = fragment.getAtomList(); assertEquals(7, atomList.size()); Atom nitrogen = atomList.get(2); - assertEquals("N", nitrogen.getElement()); - assertEquals(3, nitrogen.getBonds().size()); + assertEquals(ChemEl.N, nitrogen.getElement()); + assertEquals(3, nitrogen.getBondCount()); List neighbours = nitrogen.getAtomNeighbours();//bonds and hence neighbours come from a linked hash set so the order of the neighbours is deterministic assertEquals(3, neighbours.size()); assertEquals(atomList.get(1), neighbours.get(0)); @@ -81,7 +72,7 @@ @Test public void testSimple5() throws StructureBuildingException { - Fragment fragment = fm.buildSMILES("CC(=O)O"); + Fragment fragment = sBuilder.build("CC(=O)O"); List atomList = fragment.getAtomList(); assertEquals(4, atomList.size()); Atom carbon = atomList.get(1); @@ -95,7 +86,7 @@ @Test public void testSimple6() throws StructureBuildingException { - Fragment fragment = fm.buildSMILES("C1CCCCC1"); + Fragment fragment = sBuilder.build("C1CCCCC1"); List atomList = fragment.getAtomList(); assertEquals(6, atomList.size()); for (Atom atom : atomList) { @@ -106,7 +97,7 @@ @Test public void testSimple7() throws StructureBuildingException { - Fragment fragment = fm.buildSMILES("c1ccccc1"); + Fragment fragment = sBuilder.build("c1ccccc1"); List atomList = fragment.getAtomList(); assertEquals(6, atomList.size()); for (Atom atom : atomList) { @@ -118,7 +109,7 @@ @Test public void testSimple8() throws StructureBuildingException { - Fragment fragment = fm.buildSMILES("[I-].[Na+]"); + Fragment fragment = sBuilder.build("[I-].[Na+]"); List atomList = fragment.getAtomList(); assertEquals(2, atomList.size()); Atom iodine = atomList.get(0); @@ -132,7 +123,7 @@ @Test public void testSimple9() throws StructureBuildingException { - Fragment fragment = fm.buildSMILES("(C(=O)O)"); + Fragment fragment = sBuilder.build("(C(=O)O)"); List atomList = fragment.getAtomList(); assertEquals(3, atomList.size()); Atom carbon = atomList.get(0); @@ -141,29 +132,29 @@ @Test public void testSimple10() throws StructureBuildingException { - Fragment fragment = fm.buildSMILES("C-C-O"); + Fragment fragment = sBuilder.build("C-C-O"); List atomList = fragment.getAtomList(); assertEquals(3, atomList.size()); } @Test public void testSimple11() throws StructureBuildingException { - Fragment fragment = fm.buildSMILES("NC(Cl)(Br)C(=O)O"); + Fragment fragment = sBuilder.build("NC(Cl)(Br)C(=O)O"); List atomList = fragment.getAtomList(); assertEquals(7, atomList.size()); - assertEquals("Cl", atomList.get(2).getElement()); + assertEquals(ChemEl.Cl, atomList.get(2).getElement()); } @Test(expected=StructureBuildingException.class) public void unterminatedRingOpening() throws StructureBuildingException { - fm.buildSMILES("C1CC"); - Assert.fail("Should throw exception for bad smiles"); + sBuilder.build("C1CC"); + fail("Should throw exception for bad smiles"); } @Test public void doublePositiveCharge1() throws StructureBuildingException { - Fragment fragment = fm.buildSMILES("[C++]"); + Fragment fragment = sBuilder.build("[C++]"); List atomList = fragment.getAtomList(); assertEquals(1, atomList.size()); assertEquals(2, atomList.get(0).getCharge()); @@ -171,7 +162,7 @@ @Test public void doublePositiveCharge2() throws StructureBuildingException { - Fragment fragment = fm.buildSMILES("[C+2]"); + Fragment fragment = sBuilder.build("[C+2]"); List atomList = fragment.getAtomList(); assertEquals(1, atomList.size()); assertEquals(2, atomList.get(0).getCharge()); @@ -179,7 +170,7 @@ @Test public void doubleNegativeCharge1() throws StructureBuildingException { - Fragment fragment = fm.buildSMILES("[O--]"); + Fragment fragment = sBuilder.build("[O--]"); List atomList = fragment.getAtomList(); assertEquals(1, atomList.size()); assertEquals(-2, atomList.get(0).getCharge()); @@ -187,23 +178,23 @@ @Test public void doubleNegativeCharge2() throws StructureBuildingException { - Fragment fragment = fm.buildSMILES("[O-2]"); + Fragment fragment = sBuilder.build("[O-2]"); List atomList = fragment.getAtomList(); assertEquals(1, atomList.size()); assertEquals(-2, atomList.get(0).getCharge()); } - + @Test public void noIsotopeSpecified() throws StructureBuildingException { - Fragment fragment = fm.buildSMILES("[NH3]"); + Fragment fragment = sBuilder.build("[NH3]"); List atomList = fragment.getAtomList(); assertEquals(1, atomList.size()); assertEquals(null, atomList.get(0).getIsotope()); } - + @Test public void isotopeSpecified() throws StructureBuildingException { - Fragment fragment = fm.buildSMILES("[15NH3]"); + Fragment fragment = sBuilder.build("[15NH3]"); List atomList = fragment.getAtomList(); assertEquals(1, atomList.size()); assertNotNull("Isotope should not be null", atomList.get(0).getIsotope()); @@ -213,56 +204,56 @@ @Test(expected=StructureBuildingException.class) public void badlyFormedSMILE1() throws StructureBuildingException { - fm.buildSMILES("H5"); - Assert.fail("Should throw exception for bad smiles"); + sBuilder.build("H5"); + fail("Should throw exception for bad smiles"); } @Test(expected=StructureBuildingException.class) public void badlyFormedSMILE2() throws StructureBuildingException { - fm.buildSMILES("CH4"); - Assert.fail("Should throw exception for bad smiles"); + sBuilder.build("CH4"); + fail("Should throw exception for bad smiles"); } @Test(expected=StructureBuildingException.class) public void badlyFormedSMILE3() throws StructureBuildingException { - fm.buildSMILES("13C"); - Assert.fail("Should throw exception for bad smiles"); + sBuilder.build("13C"); + fail("Should throw exception for bad smiles"); } - @Test(expected=StructureBuildingException.class) - public void badlyFormedSMILE4() throws StructureBuildingException { - fm.buildSMILES("C=#C"); - Assert.fail("Should throw exception for bad smiles: is it a double or triple bond?"); - } - - @Test(expected=StructureBuildingException.class) - public void badlyFormedSMILE5() throws StructureBuildingException { - fm.buildSMILES("C#=C"); - Assert.fail("Should throw exception for bad smiles: is it a double or triple bond?"); - } - - @Test(expected=StructureBuildingException.class) - public void badlyFormedSMILE6() throws StructureBuildingException { - fm.buildSMILES("F//C=C/F"); - Assert.fail("Should throw exception for bad smiles: bond configuration specified twice"); - } + @Test(expected=StructureBuildingException.class) + public void badlyFormedSMILE4() throws StructureBuildingException { + sBuilder.build("C=#C"); + fail("Should throw exception for bad smiles: is it a double or triple bond?"); + } - - @Test(expected=StructureBuildingException.class) - public void badlyFormedSMILE7() throws StructureBuildingException { - fm.buildSMILES("F/C=C/\\F"); - Assert.fail("Should throw exception for bad smiles: bond configuration specified twice"); - } - - @Test(expected=StructureBuildingException.class) - public void badlyFormedSMILE8() throws StructureBuildingException { - fm.buildSMILES("F[C@@](Cl)Br"); - Assert.fail("Should throw exception for invalid atom parity, not enough atoms in atom parity"); - } + @Test(expected=StructureBuildingException.class) + public void badlyFormedSMILE5() throws StructureBuildingException { + sBuilder.build("C#=C"); + fail("Should throw exception for bad smiles: is it a double or triple bond?"); + } + + @Test(expected=StructureBuildingException.class) + public void badlyFormedSMILE6() throws StructureBuildingException { + sBuilder.build("F//C=C/F"); + fail("Should throw exception for bad smiles: bond configuration specified twice"); + } + + + @Test(expected=StructureBuildingException.class) + public void badlyFormedSMILE7() throws StructureBuildingException { + sBuilder.build("F/C=C/\\F"); + fail("Should throw exception for bad smiles: bond configuration specified twice"); + } + + @Test(expected=StructureBuildingException.class) + public void badlyFormedSMILE8() throws StructureBuildingException { + sBuilder.build("F[C@@](Cl)Br"); + fail("Should throw exception for invalid atom parity, not enough atoms in atom parity"); + } @Test public void ringClosureHandling1() throws StructureBuildingException { - Fragment fragment = fm.buildSMILES("C=1CN1"); + Fragment fragment = sBuilder.build("C=1CN1"); List atomList = fragment.getAtomList(); assertEquals(3, atomList.size()); assertEquals(2, atomList.get(0).getBondToAtomOrThrow(atomList.get(2)).getOrder()); @@ -270,7 +261,7 @@ @Test public void ringClosureHandling2() throws StructureBuildingException { - Fragment fragment = fm.buildSMILES("C1CN=1"); + Fragment fragment = sBuilder.build("C1CN=1"); List atomList = fragment.getAtomList(); assertEquals(3, atomList.size()); assertEquals(2, atomList.get(0).getBondToAtomOrThrow(atomList.get(2)).getOrder()); @@ -278,13 +269,13 @@ @Test(expected=StructureBuildingException.class) public void ringClosureHandling3() throws StructureBuildingException { - fm.buildSMILES("C#1CN=1"); - Assert.fail("Should throw exception for bad smiles"); + sBuilder.build("C#1CN=1"); + fail("Should throw exception for bad smiles"); } @Test public void ringClosureHandling4() throws StructureBuildingException { - Fragment fragment = fm.buildSMILES("C=1CN=1"); + Fragment fragment = sBuilder.build("C=1CN=1"); List atomList = fragment.getAtomList(); assertEquals(3, atomList.size()); assertEquals(2, atomList.get(0).getBondToAtomOrThrow(atomList.get(2)).getOrder()); @@ -292,25 +283,25 @@ @Test public void ringSupportGreaterThan10() throws StructureBuildingException { - Fragment fragment = fm.buildSMILES("C%10CC%10"); + Fragment fragment = sBuilder.build("C%10CC%10"); List atomList = fragment.getAtomList(); assertEquals(3, atomList.size()); assertEquals(2, atomList.get(0).getAtomNeighbours().size()); } - + @Test public void hydrogenHandling1() throws StructureBuildingException { - Fragment fragment = fm.buildSMILES("[OH3+]"); + Fragment fragment = sBuilder.build("[OH3+]"); List atomList = fragment.getAtomList(); assertEquals(1, atomList.size()); assertEquals(1, atomList.get(0).getCharge()); assertEquals(1, atomList.get(0).getProtonsExplicitlyAddedOrRemoved()); assertEquals(3, atomList.get(0).determineValency(true)); } - + @Test public void hydrogenHandling2() throws StructureBuildingException { - Fragment fragment = fm.buildSMILES("[CH3][CH2][OH]"); + Fragment fragment = sBuilder.build("[CH3][CH2][OH]"); List atomList = fragment.getAtomList(); assertEquals(3, atomList.size()); assertEquals(4, atomList.get(0).determineValency(true)); @@ -320,19 +311,19 @@ assertEquals(2, atomList.get(2).determineValency(true)); assertEquals(0, atomList.get(2).getProtonsExplicitlyAddedOrRemoved()); } - + @Test public void hydrogenHandling3() throws StructureBuildingException { - Fragment fragment = fm.buildSMILES("[SH2]"); + Fragment fragment = sBuilder.build("[SH2]"); List atomList = fragment.getAtomList(); assertEquals(1, atomList.size()); assertEquals(2, atomList.get(0).determineValency(true)); assertEquals(0, atomList.get(0).getProtonsExplicitlyAddedOrRemoved()); } - + @Test public void hydrogenHandling4() throws StructureBuildingException { - Fragment fragment = fm.buildSMILES("[SH4]"); + Fragment fragment = sBuilder.build("[SH4]"); List atomList = fragment.getAtomList(); assertEquals(1, atomList.size()); int minimumVal =atomList.get(0).getMinimumValency(); @@ -340,10 +331,10 @@ assertEquals(4, atomList.get(0).determineValency(true)); assertEquals(0, atomList.get(0).getProtonsExplicitlyAddedOrRemoved()); } - + @Test public void hydrogenHandling5() throws StructureBuildingException { - Fragment fragment = fm.buildSMILES("[SH6]"); + Fragment fragment = sBuilder.build("[SH6]"); List atomList = fragment.getAtomList(); assertEquals(1, atomList.size()); int minimumVal =atomList.get(0).getMinimumValency(); @@ -351,61 +342,61 @@ assertEquals(6, atomList.get(0).determineValency(true)); assertEquals(0, atomList.get(0).getProtonsExplicitlyAddedOrRemoved()); } - + @Test public void hydrogenHandling6() throws StructureBuildingException { - Fragment fragment = fm.buildSMILES("[SH3]"); + Fragment fragment = sBuilder.build("[SH3]"); List atomList = fragment.getAtomList(); assertEquals(1, atomList.size()); int minimumVal =atomList.get(0).getMinimumValency(); assertEquals(3, minimumVal); assertEquals(3, atomList.get(0).determineValency(true)); } - - + + @Test public void hydrogenHandling7() throws StructureBuildingException { - Fragment fragment = fm.buildSMILES("[SH3+]"); + Fragment fragment = sBuilder.build("[SH3+]"); List atomList = fragment.getAtomList(); assertEquals(1, atomList.size()); assertEquals(1, atomList.get(0).getCharge()); assertEquals(1, atomList.get(0).getProtonsExplicitlyAddedOrRemoved()); assertEquals(3, atomList.get(0).determineValency(true)); } - + @Test public void hydrogenHandling8() throws StructureBuildingException { - Fragment fragment = fm.buildSMILES("[SH+]"); + Fragment fragment = sBuilder.build("[SH+]"); List atomList = fragment.getAtomList(); assertEquals(1, atomList.size()); assertEquals(1, atomList.get(0).getCharge()); assertEquals(-1, atomList.get(0).getProtonsExplicitlyAddedOrRemoved()); assertEquals(1, atomList.get(0).determineValency(true)); } - + @Test public void hydrogenHandling9() throws StructureBuildingException { - Fragment fragment = fm.buildSMILES("[SH3-]"); + Fragment fragment = sBuilder.build("[SH3-]"); List atomList = fragment.getAtomList(); assertEquals(1, atomList.size()); assertEquals(-1, atomList.get(0).getCharge()); assertEquals(1, atomList.get(0).getProtonsExplicitlyAddedOrRemoved()); assertEquals(3, atomList.get(0).determineValency(true)); } - + @Test public void hydrogenHandling10() throws StructureBuildingException { - Fragment fragment = fm.buildSMILES("[SH-]"); + Fragment fragment = sBuilder.build("[SH-]"); List atomList = fragment.getAtomList(); assertEquals(1, atomList.size()); assertEquals(-1, atomList.get(0).getCharge()); assertEquals(-1, atomList.get(0).getProtonsExplicitlyAddedOrRemoved()); assertEquals(1, atomList.get(0).determineValency(true)); } - + @Test public void hydrogenHandling11() throws StructureBuildingException { - Fragment fragment = fm.buildSMILES("[SH5+]"); + Fragment fragment = sBuilder.build("[SH5+]"); List atomList = fragment.getAtomList(); assertEquals(1, atomList.size()); int lambdaConvent =atomList.get(0).getLambdaConventionValency(); @@ -414,91 +405,91 @@ assertEquals(1, atomList.get(0).getProtonsExplicitlyAddedOrRemoved()); assertEquals(5, atomList.get(0).determineValency(true)); } - + @Test public void hydrogenHandling12() throws StructureBuildingException { - Fragment fragment = fm.buildSMILES("[Li+]"); + Fragment fragment = sBuilder.build("[Li+]"); List atomList = fragment.getAtomList(); assertEquals(1, atomList.size()); assertEquals(1, atomList.get(0).getCharge()); assertEquals(0, atomList.get(0).getProtonsExplicitlyAddedOrRemoved()); assertEquals(0, atomList.get(0).determineValency(true)); } - + @Test public void hydrogenHandling13() throws StructureBuildingException { - Fragment fragment = fm.buildSMILES("[NaH]"); + Fragment fragment = sBuilder.build("[NaH]"); List atomList = fragment.getAtomList(); assertEquals(2, atomList.size()); assertEquals(0, atomList.get(0).getProtonsExplicitlyAddedOrRemoved()); assertEquals(0, atomList.get(0).getCharge()); - + assertEquals(0, atomList.get(1).getProtonsExplicitlyAddedOrRemoved()); assertEquals(0, atomList.get(1).getCharge()); - assertEquals("H", atomList.get(1).getElement()); + assertEquals(ChemEl.H, atomList.get(1).getElement()); } - + @Test public void hydrogenHandling14() throws StructureBuildingException { - Fragment fragment = fm.buildSMILES("-[SiH3]"); + Fragment fragment = sBuilder.build("-[SiH3]"); List atomList = fragment.getAtomList(); assertEquals(1, atomList.size()); assertEquals(4, atomList.get(0).determineValency(true)); assertEquals(0, atomList.get(0).getProtonsExplicitlyAddedOrRemoved()); } - + @Test public void hydrogenHandling15() throws StructureBuildingException { - Fragment fragment = fm.buildSMILES("=[SiH2]"); + Fragment fragment = sBuilder.build("=[SiH2]"); List atomList = fragment.getAtomList(); assertEquals(1, atomList.size()); assertEquals(4, atomList.get(0).determineValency(true)); } - - + + @Test public void hydrogenHandling16() throws StructureBuildingException { - Fragment fragment = fm.buildSMILES("#[SiH]"); + Fragment fragment = sBuilder.build("#[SiH]"); List atomList = fragment.getAtomList(); assertEquals(1, atomList.size()); assertEquals(4, atomList.get(0).determineValency(true)); } - + @Test public void hydrogenHandling17() throws StructureBuildingException { - Fragment fragment = fm.buildSMILES("[SiH3]-"); + Fragment fragment = sBuilder.build("[SiH3]-"); List atomList = fragment.getAtomList(); assertEquals(1, atomList.size()); assertEquals(4, atomList.get(0).determineValency(true)); } - + @Test public void hydrogenHandling18() throws StructureBuildingException { - Fragment fragment = fm.buildSMILES("[SiH2]="); + Fragment fragment = sBuilder.build("[SiH2]="); List atomList = fragment.getAtomList(); assertEquals(1, atomList.size()); assertEquals(4, atomList.get(0).determineValency(true)); } - + @Test public void hydrogenHandling19() throws StructureBuildingException { - Fragment fragment = fm.buildSMILES("[SiH]#"); + Fragment fragment = sBuilder.build("[SiH]#"); List atomList = fragment.getAtomList(); assertEquals(1, atomList.size()); assertEquals(4, atomList.get(0).determineValency(true)); } - + @Test public void hydrogenHandling20() throws StructureBuildingException { - Fragment fragment = fm.buildSMILES("=[Si]="); + Fragment fragment = sBuilder.build("=[Si]="); List atomList = fragment.getAtomList(); assertEquals(1, atomList.size()); assertEquals(4, atomList.get(0).determineValency(true)); } - + @Test public void hydrogenHandling21() throws StructureBuildingException { - Fragment fragment = fm.buildSMILES("[o+]1ccccc1"); + Fragment fragment = sBuilder.build("[o+]1ccccc1"); List atomList = fragment.getAtomList(); assertEquals(6, atomList.size()); assertEquals(1, atomList.get(0).getProtonsExplicitlyAddedOrRemoved()); @@ -508,10 +499,10 @@ assertEquals(4, atomList.get(1).determineValency(true)); assertEquals(true, atomList.get(1).hasSpareValency()); } - + @Test public void indicatedHydrogen() throws StructureBuildingException { - Fragment fragment = fm.buildSMILES("Nc1[nH]c(=O)c2c(n1)nc[nH]2"); + Fragment fragment = sBuilder.build("Nc1[nH]c(=O)c2c(n1)nc[nH]2"); List atomList = fragment.getAtomList(); assertEquals(11, atomList.size()); assertEquals(2, fragment.getIndicatedHydrogen().size()); @@ -521,7 +512,7 @@ @Test public void chiralityTest1() throws StructureBuildingException { - Fragment fragment = fm.buildSMILES("N[C@@H](F)C"); + Fragment fragment = sBuilder.build("N[C@@H](F)C"); List atomList = fragment.getAtomList(); assertEquals(4, atomList.size()); Atom chiralAtom = atomList.get(1); @@ -537,7 +528,7 @@ @Test public void chiralityTest2() throws StructureBuildingException { - Fragment fragment = fm.buildSMILES("N[C@H](F)C"); + Fragment fragment = sBuilder.build("N[C@H](F)C"); List atomList = fragment.getAtomList(); assertEquals(4, atomList.size()); Atom chiralAtom = atomList.get(1); @@ -553,7 +544,7 @@ @Test public void chiralityTest3() throws StructureBuildingException { - Fragment fragment = fm.buildSMILES("C2.N1.F3.[C@@H]231"); + Fragment fragment = sBuilder.build("C2.N1.F3.[C@@H]231"); List atomList = fragment.getAtomList(); assertEquals(4, atomList.size()); Atom chiralAtom = atomList.get(3); @@ -569,7 +560,7 @@ @Test public void chiralityTest4() throws StructureBuildingException { - Fragment fragment = fm.buildSMILES("[C@@H]231.C2.N1.F3"); + Fragment fragment = sBuilder.build("[C@@H]231.C2.N1.F3"); List atomList = fragment.getAtomList(); assertEquals(4, atomList.size()); Atom chiralAtom = atomList.get(0); @@ -585,7 +576,7 @@ @Test public void chiralityTest5() throws StructureBuildingException { - Fragment fragment = fm.buildSMILES("[C@@H](Cl)1[C@H](C)(F).Br1"); + Fragment fragment = sBuilder.build("[C@@H](Cl)1[C@H](C)(F).Br1"); List atomList = fragment.getAtomList(); assertEquals(6, atomList.size()); Atom chiralAtom1 = atomList.get(0); @@ -611,7 +602,7 @@ @Test public void chiralityTest6() throws StructureBuildingException { - Fragment fragment = fm.buildSMILES("I[C@@](Cl)(Br)F"); + Fragment fragment = sBuilder.build("I[C@@](Cl)(Br)F"); List atomList = fragment.getAtomList(); assertEquals(5, atomList.size()); Atom chiralAtom = atomList.get(1); @@ -627,7 +618,7 @@ @Test public void chiralityTest7() throws StructureBuildingException { - Fragment fragment = fm.buildSMILES("C[S@](N)=O"); + Fragment fragment = sBuilder.build("C[S@](N)=O"); List atomList = fragment.getAtomList(); assertEquals(4, atomList.size()); Atom chiralAtom = atomList.get(1); @@ -640,10 +631,10 @@ assertEquals(atomList.get(3), atomRefs4[3]); assertEquals(-1, atomParity.getParity()); } - + @Test public void chiralityTest8() throws StructureBuildingException { - Fragment fragment = fm.buildSMILES("[S@](C)(N)=O"); + Fragment fragment = sBuilder.build("[S@](C)(N)=O"); List atomList = fragment.getAtomList(); assertEquals(4, atomList.size()); Atom chiralAtom = atomList.get(0); @@ -658,166 +649,205 @@ } @Test - public void testDoubleBondStereo1() throws StructureBuildingException { - Fragment fragment = fm.buildSMILES("F/C=C/F"); - Bond b =fragment.findBond(2, 3); - assertEquals(BondStereoValue.TRANS, b.getBondStereo().getBondStereoValue()); - } - - @Test - public void testDoubleBondStereo2() throws StructureBuildingException { - Fragment fragment = fm.buildSMILES("F\\C=C/F"); - Bond b =fragment.findBond(2, 3); - assertEquals(BondStereoValue.CIS, b.getBondStereo().getBondStereoValue()); - } - - @Test - public void testDoubleBondStereo3() throws StructureBuildingException { - Fragment fragment = fm.buildSMILES("C(/F)=C/F"); - Bond b =fragment.findBond(1, 3); - assertEquals(BondStereoValue.CIS, b.getBondStereo().getBondStereoValue()); - } - - @Test - public void testDoubleBondStereo4() throws StructureBuildingException { - Fragment fragment = fm.buildSMILES("C(\\F)=C/F"); - Bond b =fragment.findBond(1, 3); - assertEquals(BondStereoValue.TRANS, b.getBondStereo().getBondStereoValue()); - } - - @Test - public void testDoubleBondStereo5a() throws StructureBuildingException { - Fragment fragment = fm.buildSMILES("CC1=C/F.O\\1"); - Bond b =fragment.findBond(2, 3); - assertEquals(BondStereoValue.CIS, b.getBondStereo().getBondStereoValue()); - } - - @Test - public void testDoubleBondStereo5b() throws StructureBuildingException { - Fragment fragment = fm.buildSMILES("CC/1=C/F.O1"); - Bond b =fragment.findBond(2, 3); - assertEquals(BondStereoValue.CIS, b.getBondStereo().getBondStereoValue()); - } - - @Test - public void testDoubleBondStereo6() throws StructureBuildingException { - Fragment fragment = fm.buildSMILES("CC1=C/F.O/1"); - Bond b =fragment.findBond(2, 3); - assertEquals(BondStereoValue.TRANS, b.getBondStereo().getBondStereoValue()); - } - - @Test - public void testDoubleBondMultiStereo1() throws StructureBuildingException { - Fragment fragment = fm.buildSMILES("F/C=C/C=C/C"); - Bond b =fragment.findBond(2, 3); - assertEquals(BondStereoValue.TRANS, b.getBondStereo().getBondStereoValue()); - b =fragment.findBond(4, 5); - assertEquals(BondStereoValue.TRANS, b.getBondStereo().getBondStereoValue()); - } - - @Test - public void testDoubleBondMultiStereo2() throws StructureBuildingException { - Fragment fragment = fm.buildSMILES("F/C=C\\C=C/C"); - Bond b =fragment.findBond(2, 3); - assertEquals(BondStereoValue.CIS, b.getBondStereo().getBondStereoValue()); - b =fragment.findBond(4, 5); - assertEquals(BondStereoValue.CIS, b.getBondStereo().getBondStereoValue()); - } - - @Test - public void testDoubleBondMultiStereo3() throws StructureBuildingException { - Fragment fragment = fm.buildSMILES("F/C=C\\C=C\\C"); - Bond b =fragment.findBond(2, 3); - assertEquals(BondStereoValue.CIS, b.getBondStereo().getBondStereoValue()); - b =fragment.findBond(4, 5); - assertEquals(BondStereoValue.TRANS, b.getBondStereo().getBondStereoValue()); - } - - @Test - public void testDoubleBondMultiStereo4() throws StructureBuildingException { - Fragment fragment = fm.buildSMILES("F/C=C\\C=CC"); - Bond b =fragment.findBond(2, 3); - assertEquals(BondStereoValue.CIS, b.getBondStereo().getBondStereoValue()); - b =fragment.findBond(4, 5); - assertEquals(null, b.getBondStereo()); - } - - //From http://baoilleach.blogspot.com/2010/09/are-you-on-my-side-or-not-its-ez-part.html - @Test - public void testDoubleBondNoela() throws StructureBuildingException { - Fragment fragment = fm.buildSMILES("C/C=C\\1/NC1"); - Bond b =fragment.findBond(2, 3); - if (BondStereoValue.TRANS.equals( b.getBondStereo().getBondStereoValue())){ - assertEquals("a1 a2 a3 a4", b.getBondStereo().toCML().getAttributeValue(XmlDeclarations.CML_ATOMREFS4_ATR)); - } - else{ - assertEquals("a1 a2 a3 a5", b.getBondStereo().toCML().getAttributeValue(XmlDeclarations.CML_ATOMREFS4_ATR)); - } - } - - @Test - public void testDoubleBondNoelb() throws StructureBuildingException { - Fragment fragment = fm.buildSMILES("C/C=C1/NC1"); - Bond b =fragment.findBond(2, 3); - assertEquals(BondStereoValue.TRANS, b.getBondStereo().getBondStereoValue()); - assertEquals("a1 a2 a3 a4", b.getBondStereo().toCML().getAttributeValue(XmlDeclarations.CML_ATOMREFS4_ATR)); - } - - @Test - public void testDoubleBondNoelc() throws StructureBuildingException { - Fragment fragment = fm.buildSMILES("C/C=C\\1/NC/1"); - Bond b =fragment.findBond(2, 3); - if (BondStereoValue.TRANS.equals( b.getBondStereo().getBondStereoValue())){ - assertEquals("a1 a2 a3 a4", b.getBondStereo().toCML().getAttributeValue(XmlDeclarations.CML_ATOMREFS4_ATR)); - } - else{ - assertEquals("a1 a2 a3 a5", b.getBondStereo().toCML().getAttributeValue(XmlDeclarations.CML_ATOMREFS4_ATR)); - } - } - - @Test - public void testDoubleBondNoeld() throws StructureBuildingException { - Fragment fragment = fm.buildSMILES("C/C=C1/NC/1"); - Bond b =fragment.findBond(2, 3); - if (BondStereoValue.TRANS.equals( b.getBondStereo().getBondStereoValue())){ - assertEquals("a1 a2 a3 a4", b.getBondStereo().toCML().getAttributeValue(XmlDeclarations.CML_ATOMREFS4_ATR)); - } - else{ - assertEquals("a1 a2 a3 a5", b.getBondStereo().toCML().getAttributeValue(XmlDeclarations.CML_ATOMREFS4_ATR)); - } - } - - @Test(expected=StructureBuildingException.class) - public void testDoubleBondNoele() throws StructureBuildingException { - fm.buildSMILES("C/C=C\\1\\NC1"); - Assert.fail("Should throw exception for bad smiles: contradictory double bond configuration"); - } - - @Test(expected=StructureBuildingException.class) - public void testDoubleBondNoelf() throws StructureBuildingException { - fm.buildSMILES("C/C=C\1NC\1"); - Assert.fail("Should throw exception for bad smiles: contradictory double bond configuration"); - } - - @Test(expected=StructureBuildingException.class) - public void testDoubleBondNoelg() throws StructureBuildingException { - fm.buildSMILES("C/C=C\1/NC\1"); - Assert.fail("Should throw exception for bad smiles: contradictory double bond configuration"); - } - - @Test - public void testDoubleBondNoelLike1() throws StructureBuildingException { - Fragment fragment = fm.buildSMILES("C\\1NC1=C/C"); - Bond b =fragment.findBond(3, 4); - assertEquals(BondStereoValue.CIS, b.getBondStereo().getBondStereoValue()); - assertEquals("a1 a3 a4 a5", b.getBondStereo().toCML().getAttributeValue(XmlDeclarations.CML_ATOMREFS4_ATR)); - } - - @Test - public void testDoubleBondNoelLike2() throws StructureBuildingException { - Fragment fragment = fm.buildSMILES("C1NC/1=C/C"); - Bond b =fragment.findBond(3, 4); - assertEquals(BondStereoValue.CIS, b.getBondStereo().getBondStereoValue()); - assertEquals("a1 a3 a4 a5", b.getBondStereo().toCML().getAttributeValue(XmlDeclarations.CML_ATOMREFS4_ATR)); - } + public void testDoubleBondStereo1() throws StructureBuildingException { + Fragment fragment = sBuilder.build("F/C=C/F"); + Bond b =fragment.findBond(2, 3); + assertEquals(BondStereoValue.TRANS, b.getBondStereo().getBondStereoValue()); + } + + @Test + public void testDoubleBondStereo2() throws StructureBuildingException { + Fragment fragment = sBuilder.build("F\\C=C/F"); + Bond b =fragment.findBond(2, 3); + assertEquals(BondStereoValue.CIS, b.getBondStereo().getBondStereoValue()); + } + + @Test + public void testDoubleBondStereo3() throws StructureBuildingException { + Fragment fragment = sBuilder.build("C(/F)=C/F"); + Bond b =fragment.findBond(1, 3); + assertEquals(BondStereoValue.CIS, b.getBondStereo().getBondStereoValue()); + } + + @Test + public void testDoubleBondStereo4() throws StructureBuildingException { + Fragment fragment = sBuilder.build("C(\\F)=C/F"); + Bond b =fragment.findBond(1, 3); + assertEquals(BondStereoValue.TRANS, b.getBondStereo().getBondStereoValue()); + } + + @Test + public void testDoubleBondStereo5a() throws StructureBuildingException { + Fragment fragment = sBuilder.build("CC1=C/F.O\\1"); + Bond b =fragment.findBond(2, 3); + assertEquals(BondStereoValue.CIS, b.getBondStereo().getBondStereoValue()); + } + + @Test + public void testDoubleBondStereo5b() throws StructureBuildingException { + Fragment fragment = sBuilder.build("CC/1=C/F.O1"); + Bond b =fragment.findBond(2, 3); + assertEquals(BondStereoValue.CIS, b.getBondStereo().getBondStereoValue()); + } + + @Test + public void testDoubleBondStereo6() throws StructureBuildingException { + Fragment fragment = sBuilder.build("CC1=C/F.O/1"); + Bond b =fragment.findBond(2, 3); + assertEquals(BondStereoValue.TRANS, b.getBondStereo().getBondStereoValue()); + } + + @Test + public void testDoubleBondMultiStereo1() throws StructureBuildingException { + Fragment fragment = sBuilder.build("F/C=C/C=C/C"); + Bond b =fragment.findBond(2, 3); + assertEquals(BondStereoValue.TRANS, b.getBondStereo().getBondStereoValue()); + b =fragment.findBond(4, 5); + assertEquals(BondStereoValue.TRANS, b.getBondStereo().getBondStereoValue()); + } + + @Test + public void testDoubleBondMultiStereo2() throws StructureBuildingException { + Fragment fragment = sBuilder.build("F/C=C\\C=C/C"); + Bond b =fragment.findBond(2, 3); + assertEquals(BondStereoValue.CIS, b.getBondStereo().getBondStereoValue()); + b =fragment.findBond(4, 5); + assertEquals(BondStereoValue.CIS, b.getBondStereo().getBondStereoValue()); + } + + @Test + public void testDoubleBondMultiStereo3() throws StructureBuildingException { + Fragment fragment = sBuilder.build("F/C=C\\C=C\\C"); + Bond b =fragment.findBond(2, 3); + assertEquals(BondStereoValue.CIS, b.getBondStereo().getBondStereoValue()); + b =fragment.findBond(4, 5); + assertEquals(BondStereoValue.TRANS, b.getBondStereo().getBondStereoValue()); + } + + @Test + public void testDoubleBondMultiStereo4() throws StructureBuildingException { + Fragment fragment = sBuilder.build("F/C=C\\C=CC"); + Bond b =fragment.findBond(2, 3); + assertEquals(BondStereoValue.CIS, b.getBondStereo().getBondStereoValue()); + b =fragment.findBond(4, 5); + assertEquals(null, b.getBondStereo()); + } + + //From http://baoilleach.blogspot.com/2010/09/are-you-on-my-side-or-not-its-ez-part.html + @Test + public void testDoubleBondNoela() throws StructureBuildingException { + Fragment fragment = sBuilder.build("C/C=C\\1/NC1"); + Bond b =fragment.findBond(2, 3); + if (BondStereoValue.TRANS.equals( b.getBondStereo().getBondStereoValue())){ + assertEquals("1 2 3 4", atomRefsToIdStr(b.getBondStereo().getAtomRefs4())); + } + else{ + assertEquals("1 2 3 5", atomRefsToIdStr(b.getBondStereo().getAtomRefs4())); + } + } + + @Test + public void testDoubleBondNoelb() throws StructureBuildingException { + Fragment fragment = sBuilder.build("C/C=C1/NC1"); + Bond b =fragment.findBond(2, 3); + assertEquals(BondStereoValue.TRANS, b.getBondStereo().getBondStereoValue()); + assertEquals("1 2 3 4", atomRefsToIdStr(b.getBondStereo().getAtomRefs4())); + } + + @Test + public void testDoubleBondNoelc() throws StructureBuildingException { + Fragment fragment = sBuilder.build("C/C=C\\1/NC/1"); + Bond b =fragment.findBond(2, 3); + if (BondStereoValue.TRANS.equals( b.getBondStereo().getBondStereoValue())){ + assertEquals("1 2 3 4", atomRefsToIdStr(b.getBondStereo().getAtomRefs4())); + } + else{ + assertEquals("1 2 3 5", atomRefsToIdStr(b.getBondStereo().getAtomRefs4())); + } + } + + @Test + public void testDoubleBondNoeld() throws StructureBuildingException { + Fragment fragment = sBuilder.build("C/C=C1/NC/1"); + Bond b =fragment.findBond(2, 3); + if (BondStereoValue.TRANS.equals( b.getBondStereo().getBondStereoValue())){ + assertEquals("1 2 3 4", atomRefsToIdStr(b.getBondStereo().getAtomRefs4())); + } + else{ + assertEquals("1 2 3 5", atomRefsToIdStr(b.getBondStereo().getAtomRefs4())); + } + } + + @Test(expected=StructureBuildingException.class) + public void testDoubleBondNoele() throws StructureBuildingException { + sBuilder.build("C/C=C\\1\\NC1"); + fail("Should throw exception for bad smiles: contradictory double bond configuration"); + } + + @Test(expected=StructureBuildingException.class) + public void testDoubleBondNoelf() throws StructureBuildingException { + sBuilder.build("C/C=C\1NC\1"); + fail("Should throw exception for bad smiles: contradictory double bond configuration"); + } + + @Test(expected=StructureBuildingException.class) + public void testDoubleBondNoelg() throws StructureBuildingException { + sBuilder.build("C/C=C\1/NC\1"); + fail("Should throw exception for bad smiles: contradictory double bond configuration"); + } + + @Test + public void testDoubleBondCornerCase1() throws StructureBuildingException { + Fragment fragment = sBuilder.build("C\\1NC1=C/C"); + Bond b =fragment.findBond(3, 4); + assertEquals(BondStereoValue.CIS, b.getBondStereo().getBondStereoValue()); + assertEquals("1 3 4 5", atomRefsToIdStr(b.getBondStereo().getAtomRefs4())); + } + + @Test + public void testDoubleBondCornerCase2() throws StructureBuildingException { + Fragment fragment = sBuilder.build("C1NC/1=C/C"); + Bond b =fragment.findBond(3, 4); + assertEquals(BondStereoValue.CIS, b.getBondStereo().getBondStereoValue()); + assertEquals("1 3 4 5", atomRefsToIdStr(b.getBondStereo().getAtomRefs4())); + } + + @Test(expected=StructureBuildingException.class) + public void testDoubleBondCornerCase3() throws StructureBuildingException { + sBuilder.build("C/1=C/CCCCCC/1"); + fail("Should throw exception for bad smiles: contradictory double bond configuration"); + } + + @Test(expected=StructureBuildingException.class) + public void testDoubleBondCornerCase4() throws StructureBuildingException { + sBuilder.build("C\\1=C/CCCCCC\\1"); + fail("Should throw exception for bad smiles: contradictory double bond configuration"); + } + + @Test + public void testDoubleBondCornerCase5() throws StructureBuildingException { + Fragment fragment = sBuilder.build("C\\1=C/CCCCCC/1"); + Bond b = fragment.findBond(1, 2); + assertEquals(BondStereoValue.TRANS, b.getBondStereo().getBondStereoValue()); + assertEquals("8 1 2 3", atomRefsToIdStr(b.getBondStereo().getAtomRefs4())); + } + + @Test + public void testDoubleBondCornerCase6() throws StructureBuildingException { + Fragment fragment = sBuilder.build("C/1=C/CCCCCC\\1"); + Bond b = fragment.findBond(1, 2); + assertEquals(BondStereoValue.CIS, b.getBondStereo().getBondStereoValue()); + assertEquals("8 1 2 3", atomRefsToIdStr(b.getBondStereo().getAtomRefs4())); + } + + private String atomRefsToIdStr(Atom[] atomRefs4) { + StringBuilder sb = new StringBuilder(); + for (int i = 0; i < atomRefs4.length; i++) { + sb.append(atomRefs4[i].getID()); + if (i + 1 < atomRefs4.length) { + sb.append(' '); + } + } + return sb.toString(); + } } diff -Nru opsin-1.5.0/opsin-core/src/test/java/uk/ac/cam/ch/wwmm/opsin/SMILESWriterTest.java opsin-2.3.1/opsin-core/src/test/java/uk/ac/cam/ch/wwmm/opsin/SMILESWriterTest.java --- opsin-1.5.0/opsin-core/src/test/java/uk/ac/cam/ch/wwmm/opsin/SMILESWriterTest.java 2013-07-21 14:02:29.000000000 +0000 +++ opsin-2.3.1/opsin-core/src/test/java/uk/ac/cam/ch/wwmm/opsin/SMILESWriterTest.java 2017-07-23 20:55:18.000000000 +0000 @@ -1,7 +1,6 @@ package uk.ac.cam.ch.wwmm.opsin; -import static org.junit.Assert.assertEquals; -import static org.mockito.Mockito.mock; -import static junit.framework.Assert.*; + +import static org.junit.Assert.*; import java.util.Collections; import java.util.List; @@ -13,334 +12,336 @@ public class SMILESWriterTest { - BuildState state; + private FragmentManager fm; + @Before public void setup(){ - state = new BuildState(mock(NameToStructureConfig.class), new SMILESFragmentBuilder()); + IDManager idManager = new IDManager(); + fm = new FragmentManager(new SMILESFragmentBuilder(idManager), idManager); } @Test public void testRoundTrip1() throws StructureBuildingException { - Fragment f = state.fragManager.buildSMILES("C"); - StructureBuilder.makeHydrogensExplicit(state); - String smiles = new SMILESWriter(f).generateSmiles(); + Fragment f = fm.buildSMILES("C"); + fm.makeHydrogensExplicit(); + String smiles = SMILESWriter.generateSmiles(f); assertEquals("C", smiles); } @Test public void testRoundTrip2() throws StructureBuildingException { - Fragment f = state.fragManager.buildSMILES("C#N"); - StructureBuilder.makeHydrogensExplicit(state); - String smiles = new SMILESWriter(f).generateSmiles(); + Fragment f = fm.buildSMILES("C#N"); + fm.makeHydrogensExplicit(); + String smiles = SMILESWriter.generateSmiles(f); assertEquals("C#N", smiles); } @Test public void testRoundTrip3() throws StructureBuildingException { - Fragment f = state.fragManager.buildSMILES(StringTools.multiplyString("C",200)); - StructureBuilder.makeHydrogensExplicit(state); - String smiles = new SMILESWriter(f).generateSmiles(); + Fragment f = fm.buildSMILES(StringTools.multiplyString("C",200)); + fm.makeHydrogensExplicit(); + String smiles = SMILESWriter.generateSmiles(f); assertEquals(StringTools.multiplyString("C",200), smiles); } @Test public void testRoundTrip4() throws StructureBuildingException { - Fragment f = state.fragManager.buildSMILES("O=C=O"); - StructureBuilder.makeHydrogensExplicit(state); - String smiles = new SMILESWriter(f).generateSmiles(); + Fragment f = fm.buildSMILES("O=C=O"); + fm.makeHydrogensExplicit(); + String smiles = SMILESWriter.generateSmiles(f); assertEquals("O=C=O", smiles); } @Test public void testRoundTrip5() throws StructureBuildingException { - Fragment f = state.fragManager.buildSMILES("CCN(CC)CC"); - StructureBuilder.makeHydrogensExplicit(state); - String smiles = new SMILESWriter(f).generateSmiles(); + Fragment f = fm.buildSMILES("CCN(CC)CC"); + fm.makeHydrogensExplicit(); + String smiles = SMILESWriter.generateSmiles(f); assertEquals("CCN(CC)CC", smiles); } @Test public void testRoundTrip6() throws StructureBuildingException { - Fragment f = state.fragManager.buildSMILES("CC(=O)O"); - StructureBuilder.makeHydrogensExplicit(state); - String smiles = new SMILESWriter(f).generateSmiles(); + Fragment f = fm.buildSMILES("CC(=O)O"); + fm.makeHydrogensExplicit(); + String smiles = SMILESWriter.generateSmiles(f); assertEquals("CC(=O)O", smiles); } @Test public void testRoundTrip7() throws StructureBuildingException { - Fragment f = state.fragManager.buildSMILES("C1CCCCC1"); - StructureBuilder.makeHydrogensExplicit(state); - String smiles = new SMILESWriter(f).generateSmiles(); + Fragment f = fm.buildSMILES("C1CCCCC1"); + fm.makeHydrogensExplicit(); + String smiles = SMILESWriter.generateSmiles(f); assertEquals("C1CCCCC1", smiles); } @Test public void testRoundTrip8() throws StructureBuildingException { - Fragment f = state.fragManager.buildSMILES("C1=CC=CC=C1"); - StructureBuilder.makeHydrogensExplicit(state); - String smiles = new SMILESWriter(f).generateSmiles(); + Fragment f = fm.buildSMILES("C1=CC=CC=C1"); + fm.makeHydrogensExplicit(); + String smiles = SMILESWriter.generateSmiles(f); assertEquals("C1=CC=CC=C1", smiles); } @Test public void testRoundTrip9() throws StructureBuildingException { - Fragment f = state.fragManager.buildSMILES("NC(Cl)(Br)C(=O)O"); - StructureBuilder.makeHydrogensExplicit(state); - String smiles = new SMILESWriter(f).generateSmiles(); + Fragment f = fm.buildSMILES("NC(Cl)(Br)C(=O)O"); + fm.makeHydrogensExplicit(); + String smiles = SMILESWriter.generateSmiles(f); assertEquals("NC(Cl)(Br)C(=O)O", smiles); } @Test public void testRoundTrip10() throws StructureBuildingException { - Fragment f = state.fragManager.buildSMILES("[NH4+].[Cl-].F.[He-2]"); - StructureBuilder.makeHydrogensExplicit(state); - String smiles = new SMILESWriter(f).generateSmiles(); + Fragment f = fm.buildSMILES("[NH4+].[Cl-].F.[He-2]"); + fm.makeHydrogensExplicit(); + String smiles = SMILESWriter.generateSmiles(f); assertEquals("[NH4+].[Cl-].F.[He-2]", smiles); } @Test public void testRoundTrip11() throws StructureBuildingException { - Fragment f = state.fragManager.buildSMILES("[NH4+].[Cl-].F.[He-2]"); + Fragment f = fm.buildSMILES("[NH4+].[Cl-].F.[He-2]"); List atomList = f.getAtomList(); Collections.reverse(atomList); f.reorderAtomCollection(atomList); - StructureBuilder.makeHydrogensExplicit(state); - String smiles = new SMILESWriter(f).generateSmiles(); + fm.makeHydrogensExplicit(); + String smiles = SMILESWriter.generateSmiles(f); assertEquals("[He-2].F.[Cl-].[NH4+]", smiles); } @Test public void testRoundTrip12() throws StructureBuildingException { - Fragment f = state.fragManager.buildSMILES("CCO.N=O.C#N"); - StructureBuilder.makeHydrogensExplicit(state); - String smiles = new SMILESWriter(f).generateSmiles(); + Fragment f = fm.buildSMILES("CCO.N=O.C#N"); + fm.makeHydrogensExplicit(); + String smiles = SMILESWriter.generateSmiles(f); assertEquals("CCO.N=O.C#N", smiles); } @Test public void testOrganic1() throws StructureBuildingException { - Fragment f = state.fragManager.buildSMILES("[S]"); - String smiles = new SMILESWriter(f).generateSmiles(); + Fragment f = fm.buildSMILES("[S]"); + String smiles = SMILESWriter.generateSmiles(f); assertEquals("[S]", smiles); } @Test public void testOrganic2() throws StructureBuildingException { - Fragment f = state.fragManager.buildSMILES("[S][H]"); - String smiles = new SMILESWriter(f).generateSmiles(); + Fragment f = fm.buildSMILES("[S][H]"); + String smiles = SMILESWriter.generateSmiles(f); assertEquals("[SH]", smiles); } @Test public void testOrganic3() throws StructureBuildingException { - Fragment f = state.fragManager.buildSMILES("[S]([H])[H]"); - String smiles = new SMILESWriter(f).generateSmiles(); + Fragment f = fm.buildSMILES("[S]([H])[H]"); + String smiles = SMILESWriter.generateSmiles(f); assertEquals("S", smiles); } @Test public void testOrganic4() throws StructureBuildingException { - Fragment f = state.fragManager.buildSMILES("[S]([H])([H])[H]"); - String smiles = new SMILESWriter(f).generateSmiles(); + Fragment f = fm.buildSMILES("[S]([H])([H])[H]"); + String smiles = SMILESWriter.generateSmiles(f); assertEquals("[SH3]", smiles); } @Test public void testOrganic5() throws StructureBuildingException { - Fragment f = state.fragManager.buildSMILES("[S]([H])([H])([H])[H]"); - String smiles = new SMILESWriter(f).generateSmiles(); + Fragment f = fm.buildSMILES("[S]([H])([H])([H])[H]"); + String smiles = SMILESWriter.generateSmiles(f); assertEquals("[SH4]", smiles); } @Test public void testOrganic6() throws StructureBuildingException { - Fragment f = state.fragManager.buildSMILES("S(F)(F)(F)F"); - String smiles = new SMILESWriter(f).generateSmiles(); + Fragment f = fm.buildSMILES("S(F)(F)(F)F"); + String smiles = SMILESWriter.generateSmiles(f); assertEquals("S(F)(F)(F)F", smiles); } @Test public void testOrganic7() throws StructureBuildingException { - Fragment f = state.fragManager.buildSMILES("S([H])(F)(F)(F)(F)F"); - String smiles = new SMILESWriter(f).generateSmiles(); + Fragment f = fm.buildSMILES("S([H])(F)(F)(F)(F)F"); + String smiles = SMILESWriter.generateSmiles(f); assertEquals("S(F)(F)(F)(F)F", smiles); } @Test public void testOrganic8() throws StructureBuildingException { - Fragment f = state.fragManager.buildSMILES("S([H])([H])(F)(F)(F)F"); - String smiles = new SMILESWriter(f).generateSmiles(); + Fragment f = fm.buildSMILES("S([H])([H])(F)(F)(F)F"); + String smiles = SMILESWriter.generateSmiles(f); assertEquals("[SH2](F)(F)(F)F", smiles); } @Test public void testOrganic9() throws StructureBuildingException { - Fragment f = state.fragManager.buildSMILES("S(F)(F)(F)(F)(F)(F)F"); - String smiles = new SMILESWriter(f).generateSmiles(); + Fragment f = fm.buildSMILES("S(F)(F)(F)(F)(F)(F)F"); + String smiles = SMILESWriter.generateSmiles(f); assertEquals("S(F)(F)(F)(F)(F)(F)F", smiles); } @Test public void testOrganic10() throws StructureBuildingException { - Fragment f = state.fragManager.buildSMILES("[I]([H])([H])[H]"); - String smiles = new SMILESWriter(f).generateSmiles(); + Fragment f = fm.buildSMILES("[I]([H])([H])[H]"); + String smiles = SMILESWriter.generateSmiles(f); assertEquals("[IH3]", smiles); } @Test public void testCharged1() throws StructureBuildingException { - Fragment f = state.fragManager.buildSMILES("[CH3+]"); - StructureBuilder.makeHydrogensExplicit(state); - String smiles = new SMILESWriter(f).generateSmiles(); + Fragment f = fm.buildSMILES("[CH3+]"); + fm.makeHydrogensExplicit(); + String smiles = SMILESWriter.generateSmiles(f); assertEquals("[CH3+]", smiles); } @Test public void testCharged2() throws StructureBuildingException { - Fragment f = state.fragManager.buildSMILES("[Mg+2]"); - StructureBuilder.makeHydrogensExplicit(state); - String smiles = new SMILESWriter(f).generateSmiles(); + Fragment f = fm.buildSMILES("[Mg+2]"); + fm.makeHydrogensExplicit(); + String smiles = SMILESWriter.generateSmiles(f); assertEquals("[Mg+2]", smiles); } @Test public void testCharged3() throws StructureBuildingException { - Fragment f = state.fragManager.buildSMILES("[BH4-]"); - StructureBuilder.makeHydrogensExplicit(state); - String smiles = new SMILESWriter(f).generateSmiles(); + Fragment f = fm.buildSMILES("[BH4-]"); + fm.makeHydrogensExplicit(); + String smiles = SMILESWriter.generateSmiles(f); assertEquals("[BH4-]", smiles); } @Test public void testCharged4() throws StructureBuildingException { - Fragment f = state.fragManager.buildSMILES("[O-2]"); - StructureBuilder.makeHydrogensExplicit(state); - String smiles = new SMILESWriter(f).generateSmiles(); + Fragment f = fm.buildSMILES("[O-2]"); + fm.makeHydrogensExplicit(); + String smiles = SMILESWriter.generateSmiles(f); assertEquals("[O-2]", smiles); } @Test public void testIsotope() throws StructureBuildingException { - Fragment f = state.fragManager.buildSMILES("[15NH3]"); - StructureBuilder.makeHydrogensExplicit(state); - String smiles = new SMILESWriter(f).generateSmiles(); + Fragment f = fm.buildSMILES("[15NH3]"); + fm.makeHydrogensExplicit(); + String smiles = SMILESWriter.generateSmiles(f); assertEquals("[15NH3]", smiles); } @Test public void testRGroup1() throws StructureBuildingException { - Fragment f = state.fragManager.buildSMILES("[R]CC[R]"); - StructureBuilder.makeHydrogensExplicit(state); - String smiles = new SMILESWriter(f).generateSmiles(); + Fragment f = fm.buildSMILES("[R]CC[R]"); + fm.makeHydrogensExplicit(); + String smiles = SMILESWriter.generateSmiles(f); assertEquals("*CC*", smiles); } @Test public void testRGroup2() throws StructureBuildingException { - Fragment f = state.fragManager.buildSMILES("[H][R]"); - StructureBuilder.makeHydrogensExplicit(state); - String smiles = new SMILESWriter(f).generateSmiles(); + Fragment f = fm.buildSMILES("[H][R]"); + fm.makeHydrogensExplicit(); + String smiles = SMILESWriter.generateSmiles(f); assertEquals("[H]*", smiles); } @Test public void testRingOpeningsGreaterThan10() throws StructureBuildingException { - Fragment f = state.fragManager.buildSMILES("C12=C3C4=C5C6=C1C7=C8C9=C1C%10=C%11C(=C29)C3=C2C3=C4C4=C5C5=C9C6=C7C6=C7C8=C1C1=C8C%10=C%10C%11=C2C2=C3C3=C4C4=C5C5=C%11C%12=C(C6=C95)C7=C1C1=C%12C5=C%11C4=C3C3=C5C(=C81)C%10=C23"); - StructureBuilder.makeHydrogensExplicit(state); - String smiles = new SMILESWriter(f).generateSmiles(); + Fragment f = fm.buildSMILES("C12=C3C4=C5C6=C1C7=C8C9=C1C%10=C%11C(=C29)C3=C2C3=C4C4=C5C5=C9C6=C7C6=C7C8=C1C1=C8C%10=C%10C%11=C2C2=C3C3=C4C4=C5C5=C%11C%12=C(C6=C95)C7=C1C1=C%12C5=C%11C4=C3C3=C5C(=C81)C%10=C23"); + fm.makeHydrogensExplicit(); + String smiles = SMILESWriter.generateSmiles(f); assertEquals("C12=C3C4=C5C6=C1C1=C7C8=C9C%10=C%11C(=C28)C3=C3C2=C4C4=C5C5=C8C6=C1C1=C6C7=C9C9=C7C%10=C%10C%11=C3C3=C2C2=C4C4=C5C5=C%11C%12=C(C1=C85)C6=C9C9=C%12C%12=C%11C4=C2C2=C%12C(=C79)C%10=C32", smiles); } @Test public void testHydrogenNotBondedToAnyNonHydrogen1() throws StructureBuildingException { - Fragment f = state.fragManager.buildSMILES("[H-].[H+]"); - String smiles = new SMILESWriter(f).generateSmiles(); + Fragment f = fm.buildSMILES("[H-].[H+]"); + String smiles = SMILESWriter.generateSmiles(f); assertEquals("[H-].[H+]", smiles); } @Test public void testHydrogenNotBondedToAnyNonHydrogen2() throws StructureBuildingException { - Fragment f = state.fragManager.buildSMILES("[H][H]"); - String smiles = new SMILESWriter(f).generateSmiles(); + Fragment f = fm.buildSMILES("[H][H]"); + String smiles = SMILESWriter.generateSmiles(f); assertEquals("[H][H]", smiles); } @Test public void testHydrogenNotBondedToAnyNonHydrogen3() throws StructureBuildingException { - Fragment f = state.fragManager.buildSMILES("[2H][H]"); - String smiles = new SMILESWriter(f).generateSmiles(); + Fragment f = fm.buildSMILES("[2H][H]"); + String smiles = SMILESWriter.generateSmiles(f); assertEquals("[2H][H]", smiles); } @Test public void testHydrogenNotBondedToAnyNonHydrogen4() throws StructureBuildingException { - Fragment f = state.fragManager.buildSMILES("[H]B1[H]B([H])[H]1"); - String smiles = new SMILESWriter(f).generateSmiles(); + Fragment f = fm.buildSMILES("[H]B1[H]B([H])[H]1"); + String smiles = SMILESWriter.generateSmiles(f); assertEquals("B1[H]B[H]1", smiles); } @Test public void testTetrahedralChirality1() throws StructureBuildingException { - Fragment f = state.fragManager.buildSMILES("N[C@@H](F)C"); - StructureBuilder.makeHydrogensExplicit(state); - String smiles = new SMILESWriter(f).generateSmiles(); + Fragment f = fm.buildSMILES("N[C@@H](F)C"); + fm.makeHydrogensExplicit(); + String smiles = SMILESWriter.generateSmiles(f); assertEquals("N[C@@H](F)C", smiles); } @Test public void testTetrahedralChirality2() throws StructureBuildingException { - Fragment f = state.fragManager.buildSMILES("N[C@H](F)C"); - StructureBuilder.makeHydrogensExplicit(state); - String smiles = new SMILESWriter(f).generateSmiles(); + Fragment f = fm.buildSMILES("N[C@H](F)C"); + fm.makeHydrogensExplicit(); + String smiles = SMILESWriter.generateSmiles(f); assertEquals("N[C@H](F)C", smiles); } @Test public void testTetrahedralChirality3() throws StructureBuildingException { - Fragment f = state.fragManager.buildSMILES("C2.N1.F3.[C@@H]231"); - StructureBuilder.makeHydrogensExplicit(state); - String smiles = new SMILESWriter(f).generateSmiles(); + Fragment f = fm.buildSMILES("C2.N1.F3.[C@@H]231"); + fm.makeHydrogensExplicit(); + String smiles = SMILESWriter.generateSmiles(f); assertEquals("C[C@H](F)N", smiles); } @Test public void testTetrahedralChirality4() throws StructureBuildingException { - Fragment f = state.fragManager.buildSMILES("[C@@H]231.C2.N1.F3"); - StructureBuilder.makeHydrogensExplicit(state); - String smiles = new SMILESWriter(f).generateSmiles(); + Fragment f = fm.buildSMILES("[C@@H]231.C2.N1.F3"); + fm.makeHydrogensExplicit(); + String smiles = SMILESWriter.generateSmiles(f); assertEquals("[C@H](C)(N)F", smiles); } @Test public void testTetrahedralChirality5() throws StructureBuildingException { - Fragment f = state.fragManager.buildSMILES("[C@@H](Cl)1[C@H](C)(F).Br1"); - StructureBuilder.makeHydrogensExplicit(state); - String smiles = new SMILESWriter(f).generateSmiles(); + Fragment f = fm.buildSMILES("[C@@H](Cl)1[C@H](C)(F).Br1"); + fm.makeHydrogensExplicit(); + String smiles = SMILESWriter.generateSmiles(f); assertEquals("[C@H](Cl)([C@H](C)F)Br", smiles); } @Test public void testTetrahedralChirality6() throws StructureBuildingException { - Fragment f = state.fragManager.buildSMILES("I[C@@](Cl)(Br)F"); - StructureBuilder.makeHydrogensExplicit(state); - String smiles = new SMILESWriter(f).generateSmiles(); + Fragment f = fm.buildSMILES("I[C@@](Cl)(Br)F"); + fm.makeHydrogensExplicit(); + String smiles = SMILESWriter.generateSmiles(f); assertEquals("I[C@@](Cl)(Br)F", smiles); } @Test public void testTetrahedralChirality7() throws StructureBuildingException { - Fragment f = state.fragManager.buildSMILES("C[S@](N)=O"); - StructureBuilder.makeHydrogensExplicit(state); - String smiles = new SMILESWriter(f).generateSmiles(); + Fragment f = fm.buildSMILES("C[S@](N)=O"); + fm.makeHydrogensExplicit(); + String smiles = SMILESWriter.generateSmiles(f); assertEquals("C[S@](N)=O", smiles); } @Test public void testDoubleBondSupport1() throws StructureBuildingException { - Fragment f = state.fragManager.buildSMILES("C/C=C/C"); - StructureBuilder.makeHydrogensExplicit(state); - String smiles = new SMILESWriter(f).generateSmiles(); + Fragment f = fm.buildSMILES("C/C=C/C"); + fm.makeHydrogensExplicit(); + String smiles = SMILESWriter.generateSmiles(f); if (!smiles.equals("C/C=C/C") && !smiles.equals("C\\C=C\\C")){ fail(smiles +" did not correspond to one of the expected SMILES strings"); } @@ -349,9 +350,9 @@ @Test public void testDoubleBondSupport2() throws StructureBuildingException { - Fragment f = state.fragManager.buildSMILES("C/C=C\\C"); - StructureBuilder.makeHydrogensExplicit(state); - String smiles = new SMILESWriter(f).generateSmiles(); + Fragment f = fm.buildSMILES("C/C=C\\C"); + fm.makeHydrogensExplicit(); + String smiles = SMILESWriter.generateSmiles(f); if (!smiles.equals("C/C=C\\C") && !smiles.equals("C\\C=C/C")){ fail(smiles +" did not correspond to one of the expected SMILES strings"); } @@ -360,9 +361,9 @@ @Test public void testDoubleBondSupport3() throws StructureBuildingException { - Fragment f = state.fragManager.buildSMILES("C/C=C\\C=C/C"); - StructureBuilder.makeHydrogensExplicit(state); - String smiles = new SMILESWriter(f).generateSmiles(); + Fragment f = fm.buildSMILES("C/C=C\\C=C/C"); + fm.makeHydrogensExplicit(); + String smiles = SMILESWriter.generateSmiles(f); if (!smiles.equals("C/C=C\\C=C/C") && !smiles.equals("C\\C=C/C=C\\C")){ fail(smiles +" did not correspond to one of the expected SMILES strings"); } @@ -370,11 +371,11 @@ @Test public void testDoubleBondSupport4() throws StructureBuildingException { - Fragment f = state.fragManager.buildSMILES("ClC(C(=O)[O-])=CC(=CC(=O)[O-])Cl"); - StructureBuilder.makeHydrogensExplicit(state); + Fragment f = fm.buildSMILES("ClC(C(=O)[O-])=CC(=CC(=O)[O-])Cl"); + fm.makeHydrogensExplicit(); f.findBond(2, 6).setBondStereoElement(new Atom[]{f.getAtomByID(1), f.getAtomByID(2), f.getAtomByID(6), f.getAtomByID(7)}, BondStereoValue.TRANS); f.findBond(7, 8).setBondStereoElement(new Atom[]{f.getAtomByID(12), f.getAtomByID(7), f.getAtomByID(8), f.getAtomByID(9)}, BondStereoValue.TRANS); - String smiles = new SMILESWriter(f).generateSmiles(); + String smiles = SMILESWriter.generateSmiles(f); if (!smiles.equals("Cl\\C(\\C(=O)[O-])=C\\C(=C/C(=O)[O-])\\Cl") && !smiles.equals("Cl/C(/C(=O)[O-])=C/C(=C\\C(=O)[O-])/Cl")){ fail(smiles +" did not correspond to one of the expected SMILES strings"); } @@ -382,9 +383,9 @@ @Test public void testDoubleBondSupport5() throws StructureBuildingException { - Fragment f = state.fragManager.buildSMILES("C/C=N\\O"); - StructureBuilder.makeHydrogensExplicit(state); - String smiles = new SMILESWriter(f).generateSmiles(); + Fragment f = fm.buildSMILES("C/C=N\\O"); + fm.makeHydrogensExplicit(); + String smiles = SMILESWriter.generateSmiles(f); if (!smiles.equals("C/C=N\\O") && !smiles.equals("C\\C=N/O")){ fail(smiles +" did not correspond to one of the expected SMILES strings"); } @@ -392,9 +393,9 @@ @Test public void testDoubleBondSupport6() throws StructureBuildingException { - Fragment f = state.fragManager.buildSMILES("O=C(/C=C(C(O)=O)\\C=C/C(O)=O)O"); - StructureBuilder.makeHydrogensExplicit(state); - String smiles = new SMILESWriter(f).generateSmiles(); + Fragment f = fm.buildSMILES("O=C(/C=C(C(O)=O)\\C=C/C(O)=O)O"); + fm.makeHydrogensExplicit(); + String smiles = SMILESWriter.generateSmiles(f); if (!smiles.equals("O=C(/C=C(/C(O)=O)\\C=C/C(O)=O)O") && !smiles.equals("O=C(\\C=C(\\C(O)=O)/C=C\\C(O)=O)O")){ fail(smiles +" did not correspond to one of the expected SMILES strings"); } @@ -402,11 +403,11 @@ @Test public void testDoubleBondSupport7() throws StructureBuildingException { - Fragment f = state.fragManager.buildSMILES("C(=C(C=CC(=O)O)C(=O)O)C(=O)O"); - StructureBuilder.makeHydrogensExplicit(state); + Fragment f = fm.buildSMILES("C(=C(C=CC(=O)O)C(=O)O)C(=O)O"); + fm.makeHydrogensExplicit(); f.findBond(1, 2).setBondStereoElement(new Atom[]{f.getAtomByID(11), f.getAtomByID(1), f.getAtomByID(2), f.getAtomByID(8)}, BondStereoValue.TRANS); f.findBond(3, 4).setBondStereoElement(new Atom[]{f.getAtomByID(2), f.getAtomByID(3), f.getAtomByID(4), f.getAtomByID(5)}, BondStereoValue.TRANS); - String smiles = new SMILESWriter(f).generateSmiles(); + String smiles = SMILESWriter.generateSmiles(f); if (!smiles.equals("C(=C(/C=C/C(=O)O)\\C(=O)O)/C(=O)O") && !smiles.equals("C(=C(\\C=C\\C(=O)O)/C(=O)O)\\C(=O)O")){ fail(smiles +" did not correspond to one of the expected SMILES strings"); } @@ -415,11 +416,38 @@ @Test public void testDoubleBondSupport8() throws StructureBuildingException { //hydrogen on the nitrogen must be explicit! - Fragment f = state.fragManager.buildSMILES("[H]/N=C(\\N)/O"); - StructureBuilder.makeHydrogensExplicit(state); - String smiles = new SMILESWriter(f).generateSmiles(); + Fragment f = fm.buildSMILES("[H]/N=C(\\N)/O"); + fm.makeHydrogensExplicit(); + String smiles = SMILESWriter.generateSmiles(f); if (!smiles.equals("[H]/N=C(\\N)/O") && !smiles.equals("[H]\\N=C(/N)\\O")){ fail(smiles +" did not correspond to one of the expected SMILES strings"); } } + + @Test + public void testLabelling1() throws StructureBuildingException { + Fragment f = fm.buildSMILES("CCC", "", XmlDeclarations.NONE_LABELS_VAL); + for (Atom a : f.getAtomList()) { + assertEquals(0, a.getLocants().size()); + } + + Fragment f2 = fm.buildSMILES("CCC", "", ""); + for (Atom a : f2.getAtomList()) { + assertEquals(0, a.getLocants().size()); + } + } + + @Test + public void testLabelling2() throws StructureBuildingException { + Fragment f = fm.buildSMILES("CCC", "", "1/2,alpha,2'/"); + List atoms = f.getAtomList(); + assertEquals(1, atoms.get(0).getLocants().size()); + assertEquals(3, atoms.get(1).getLocants().size()); + assertEquals(0, atoms.get(2).getLocants().size()); + + assertEquals("1", atoms.get(0).getLocants().get(0)); + assertEquals("2", atoms.get(1).getLocants().get(0)); + assertEquals("alpha", atoms.get(1).getLocants().get(1)); + assertEquals("2'", atoms.get(1).getLocants().get(2)); + } } diff -Nru opsin-1.5.0/opsin-core/src/test/java/uk/ac/cam/ch/wwmm/opsin/SSSRTest.java opsin-2.3.1/opsin-core/src/test/java/uk/ac/cam/ch/wwmm/opsin/SSSRTest.java --- opsin-1.5.0/opsin-core/src/test/java/uk/ac/cam/ch/wwmm/opsin/SSSRTest.java 2013-07-21 14:02:29.000000000 +0000 +++ opsin-2.3.1/opsin-core/src/test/java/uk/ac/cam/ch/wwmm/opsin/SSSRTest.java 2017-07-23 20:55:18.000000000 +0000 @@ -1,6 +1,7 @@ package uk.ac.cam.ch.wwmm.opsin; -import static junit.framework.Assert.assertEquals; + +import static org.junit.Assert.*; import java.util.List; diff -Nru opsin-1.5.0/opsin-core/src/test/java/uk/ac/cam/ch/wwmm/opsin/StereochemistryTest.java opsin-2.3.1/opsin-core/src/test/java/uk/ac/cam/ch/wwmm/opsin/StereochemistryTest.java --- opsin-1.5.0/opsin-core/src/test/java/uk/ac/cam/ch/wwmm/opsin/StereochemistryTest.java 2013-07-21 14:02:29.000000000 +0000 +++ opsin-2.3.1/opsin-core/src/test/java/uk/ac/cam/ch/wwmm/opsin/StereochemistryTest.java 2017-07-23 20:55:18.000000000 +0000 @@ -1,39 +1,40 @@ package uk.ac.cam.ch.wwmm.opsin; -import java.util.ArrayList; +import static org.junit.Assert.*; +import static org.mockito.Mockito.mock; +import java.util.ArrayList; import java.util.List; -import static junit.framework.Assert.*; - -import nu.xom.Element; - import org.junit.AfterClass; +import org.junit.Before; import org.junit.BeforeClass; import org.junit.Test; -import static org.mockito.Mockito.mock; -import uk.ac.cam.ch.wwmm.opsin.Atom; -import uk.ac.cam.ch.wwmm.opsin.Fragment; -import uk.ac.cam.ch.wwmm.opsin.NameToStructure; + import uk.ac.cam.ch.wwmm.opsin.BondStereo.BondStereoValue; import uk.ac.cam.ch.wwmm.opsin.StereoAnalyser.StereoBond; import uk.ac.cam.ch.wwmm.opsin.StereoAnalyser.StereoCentre; public class StereochemistryTest { + private FragmentManager fm; + + @Before + public void setup() { + IDManager idManager = new IDManager(); + fm = new FragmentManager(new SMILESFragmentBuilder(idManager), idManager); + } + private static NameToStructure n2s; - private static SMILESFragmentBuilder sBuilder; @BeforeClass - public static void setup() { + public static void intialSetup() { n2s = NameToStructure.getInstance(); - sBuilder = new SMILESFragmentBuilder(); } @AfterClass public static void cleanUp(){ n2s = null; - sBuilder =null; } /* @@ -48,12 +49,12 @@ StereoCentre sc = stereoAnalyser.findStereoCentres().get(0); assertNotNull(sc.getStereoAtom()); Atom stereoAtom = sc.getStereoAtom(); - assertEquals("C", stereoAtom.getElement()); + assertEquals(ChemEl.C, stereoAtom.getElement()); assertEquals(4, stereoAtom.getID()); } @Test - public void findStereoCentresNacetylleucine() { + public void findStereoCentresNacetylleucine() throws CipOrderingException { Fragment f = n2s.parseChemicalName("N-acetylleucine").getStructure(); StereoAnalyser stereoAnalyser = new StereoAnalyser(f); assertEquals(1, stereoAnalyser.findStereoCentres().size()); @@ -61,21 +62,21 @@ StereoCentre sc = stereoAnalyser.findStereoCentres().get(0); assertNotNull(sc.getStereoAtom()); Atom stereoAtom = sc.getStereoAtom(); - assertEquals("C", stereoAtom.getElement()); + assertEquals(ChemEl.C, stereoAtom.getElement()); List neighbours = sc.getCipOrderedAtoms(); for (int i = 0; i < neighbours.size(); i++) { Atom a = neighbours.get(i); if (i==0){ - assertEquals(a.getElement(), "H"); + assertEquals(ChemEl.H, a.getElement()); } else if (i==1){ - assertEquals(a.getElement(), "C"); + assertEquals(ChemEl.C, a.getElement()); } else if (i==2){ - assertEquals(a.getElement(), "C"); + assertEquals(ChemEl.C, a.getElement()); } else if (i==3){ - assertEquals(a.getElement(), "N"); + assertEquals(ChemEl.N, a.getElement()); } } } @@ -116,13 +117,10 @@ assertNotNull(atom3); Bond chiralBond = atom2.getBondToAtom(atom3); assertNotNull(chiralBond); - Element bondStereo = chiralBond.getBondStereo().toCML(); + BondStereo bondStereo = chiralBond.getBondStereo(); assertNotNull(bondStereo); - assertEquals(XmlDeclarations.CML_BONDSTEREO_EL, bondStereo.getLocalName()); - String atomRefs4 = bondStereo.getAttributeValue(XmlDeclarations.CML_ATOMREFS4_ATR); - assertEquals("a1 a2 a3 a4", atomRefs4); - assertEquals("a1 a2 a3 a4", atomRefs4); - assertEquals(BondStereoValue.CIS.toString(), bondStereo.getValue()); + assertEquals("1 2 3 4", atomRefsToIdStr(bondStereo.getAtomRefs4())); + assertEquals(BondStereoValue.CIS, bondStereo.getBondStereoValue()); } @Test @@ -134,13 +132,10 @@ assertNotNull(atom3); Bond chiralBond = atom2.getBondToAtom(atom3); assertNotNull(chiralBond); - Element bondStereo = chiralBond.getBondStereo().toCML(); + BondStereo bondStereo = chiralBond.getBondStereo(); assertNotNull(bondStereo); - assertEquals(XmlDeclarations.CML_BONDSTEREO_EL, bondStereo.getLocalName()); - String atomRefs4 = bondStereo.getAttributeValue(XmlDeclarations.CML_ATOMREFS4_ATR); - assertEquals("a1 a2 a3 a4", atomRefs4); - assertEquals("a1 a2 a3 a4", atomRefs4); - assertEquals(BondStereoValue.TRANS.toString(), bondStereo.getValue()); + assertEquals("1 2 3 4", atomRefsToIdStr(bondStereo.getAtomRefs4())); + assertEquals(BondStereoValue.TRANS, bondStereo.getBondStereoValue()); } @Test @@ -152,13 +147,10 @@ assertNotNull(atom3); Bond chiralBond = atom2.getBondToAtom(atom3); assertNotNull(chiralBond); - Element bondStereo = chiralBond.getBondStereo().toCML(); + BondStereo bondStereo = chiralBond.getBondStereo(); assertNotNull(bondStereo); - assertEquals(XmlDeclarations.CML_BONDSTEREO_EL, bondStereo.getLocalName()); - String atomRefs4 = bondStereo.getAttributeValue(XmlDeclarations.CML_ATOMREFS4_ATR); - assertEquals("a1 a2 a3 a4", atomRefs4); - assertEquals("a1 a2 a3 a4", atomRefs4); - assertEquals(BondStereoValue.CIS.toString(), bondStereo.getValue()); + assertEquals("1 2 3 4", atomRefsToIdStr(bondStereo.getAtomRefs4())); + assertEquals(BondStereoValue.CIS, bondStereo.getBondStereoValue()); } @Test @@ -170,13 +162,10 @@ assertNotNull(atom3); Bond chiralBond = atom2.getBondToAtom(atom3); assertNotNull(chiralBond); - Element bondStereo = chiralBond.getBondStereo().toCML(); + BondStereo bondStereo = chiralBond.getBondStereo(); assertNotNull(bondStereo); - assertEquals(XmlDeclarations.CML_BONDSTEREO_EL, bondStereo.getLocalName()); - String atomRefs4 = bondStereo.getAttributeValue(XmlDeclarations.CML_ATOMREFS4_ATR); - assertEquals("a1 a2 a3 a4", atomRefs4); - assertEquals("a1 a2 a3 a4", atomRefs4); - assertEquals(BondStereoValue.TRANS.toString(), bondStereo.getValue()); + assertEquals("1 2 3 4", atomRefsToIdStr(bondStereo.getAtomRefs4())); + assertEquals(BondStereoValue.TRANS, bondStereo.getBondStereoValue()); } @Test @@ -188,13 +177,10 @@ assertNotNull(atom3); Bond chiralBond = atom2.getBondToAtom(atom3); assertNotNull(chiralBond); - Element bondStereo = chiralBond.getBondStereo().toCML(); + BondStereo bondStereo = chiralBond.getBondStereo(); assertNotNull(bondStereo); - assertEquals(XmlDeclarations.CML_BONDSTEREO_EL, bondStereo.getLocalName()); - String atomRefs4 = bondStereo.getAttributeValue(XmlDeclarations.CML_ATOMREFS4_ATR); - assertEquals("a1 a2 a3 a4", atomRefs4); - assertEquals("a1 a2 a3 a4", atomRefs4); - assertEquals(BondStereoValue.CIS.toString(), bondStereo.getValue()); + assertEquals("1 2 3 4", atomRefsToIdStr(bondStereo.getAtomRefs4())); + assertEquals(BondStereoValue.CIS, bondStereo.getBondStereoValue()); } @Test @@ -206,13 +192,10 @@ assertNotNull(atom3); Bond chiralBond = atom2.getBondToAtom(atom3); assertNotNull(chiralBond); - Element bondStereo = chiralBond.getBondStereo().toCML(); + BondStereo bondStereo = chiralBond.getBondStereo(); assertNotNull(bondStereo); - assertEquals(XmlDeclarations.CML_BONDSTEREO_EL, bondStereo.getLocalName()); - String atomRefs4 = bondStereo.getAttributeValue(XmlDeclarations.CML_ATOMREFS4_ATR); - assertEquals("a1 a2 a3 a4", atomRefs4); - assertEquals("a1 a2 a3 a4", atomRefs4); - assertEquals(BondStereoValue.TRANS.toString(), bondStereo.getValue()); + assertEquals("1 2 3 4", atomRefsToIdStr(bondStereo.getAtomRefs4())); + assertEquals(BondStereoValue.TRANS, bondStereo.getBondStereoValue()); } @@ -241,35 +224,33 @@ @Test public void testCIPpriority1() throws StructureBuildingException { - FragmentManager fm = new FragmentManager(sBuilder, new IDManager()); - Fragment f = sBuilder.build("C(Br)(F)([H])Cl", fm); - List cipOrdered = new CipSequenceRules(f.getFirstAtom()).getNeighbouringAtomsInCIPOrder(); + Fragment f = fm.buildSMILES("C(Br)(F)([H])Cl"); + List cipOrdered = new CipSequenceRules(f.getFirstAtom()).getNeighbouringAtomsInCipOrder(); for (int i = 0; i < cipOrdered.size(); i++) { Atom a = cipOrdered.get(i); if (i==0){ - assertEquals("H", a.getElement()); + assertEquals(ChemEl.H, a.getElement()); } else if (i==1){ - assertEquals("F", a.getElement()); + assertEquals(ChemEl.F, a.getElement()); } else if (i==2){ - assertEquals("Cl", a.getElement()); + assertEquals(ChemEl.Cl, a.getElement()); } else if (i==3){ - assertEquals("Br", a.getElement()); + assertEquals(ChemEl.Br, a.getElement()); } } } @Test public void testCIPpriority2() throws StructureBuildingException { - FragmentManager fm = new FragmentManager(sBuilder, new IDManager()); - Fragment f = sBuilder.build("C([H])(C1CC1)(C1CCC1)O", fm); - List cipOrdered = new CipSequenceRules(f.getFirstAtom()).getNeighbouringAtomsInCIPOrder(); + Fragment f = fm.buildSMILES("C([H])(C1CC1)(C1CCC1)O"); + List cipOrdered = new CipSequenceRules(f.getFirstAtom()).getNeighbouringAtomsInCipOrder(); for (int i = 0; i < cipOrdered.size(); i++) { Atom a = cipOrdered.get(i); if (i==0){ - assertEquals("H", a.getElement()); + assertEquals(ChemEl.H, a.getElement()); } else if (i==1){ assertEquals(3, a.getID()); @@ -278,7 +259,7 @@ assertEquals(6, a.getID()); } else if (i==3){ - assertEquals("O", a.getElement()); + assertEquals(ChemEl.O, a.getElement()); } } } @@ -286,13 +267,12 @@ @Test public void testCIPpriority3() throws StructureBuildingException { - FragmentManager fm = new FragmentManager(sBuilder, new IDManager()); - Fragment f = sBuilder.build("[C](N)(C1=CC(O)=CC=C1)([H])C2=CC=C(O)C=C2", fm); - List cipOrdered = new CipSequenceRules(f.getFirstAtom()).getNeighbouringAtomsInCIPOrder(); + Fragment f = fm.buildSMILES("[C](N)(C1=CC(O)=CC=C1)([H])C2=CC=C(O)C=C2"); + List cipOrdered = new CipSequenceRules(f.getFirstAtom()).getNeighbouringAtomsInCipOrder(); for (int i = 0; i < cipOrdered.size(); i++) { Atom a = cipOrdered.get(i); if (i==0){ - assertEquals("H", a.getElement()); + assertEquals(ChemEl.H, a.getElement()); } else if (i==1){ assertEquals(11, a.getID()); @@ -301,20 +281,19 @@ assertEquals(3, a.getID()); } else if (i==3){ - assertEquals("N", a.getElement()); + assertEquals(ChemEl.N, a.getElement()); } } } @Test public void testCIPpriority4() throws StructureBuildingException { - FragmentManager fm = new FragmentManager(sBuilder, new IDManager()); - Fragment f = sBuilder.build("[C](N)(C1CC(O)CCC1)([H])C2CCC(O)CC2", fm); - List cipOrdered = new CipSequenceRules(f.getFirstAtom()).getNeighbouringAtomsInCIPOrder(); + Fragment f = fm.buildSMILES("[C](N)(C1CC(O)CCC1)([H])C2CCC(O)CC2"); + List cipOrdered = new CipSequenceRules(f.getFirstAtom()).getNeighbouringAtomsInCipOrder(); for (int i = 0; i < cipOrdered.size(); i++) { Atom a = cipOrdered.get(i); if (i==0){ - assertEquals("H", a.getElement()); + assertEquals(ChemEl.H, a.getElement()); } else if (i==1){ assertEquals(11, a.getID()); @@ -323,20 +302,19 @@ assertEquals(3, a.getID()); } else if (i==3){ - assertEquals("N", a.getElement()); + assertEquals(ChemEl.N, a.getElement()); } } } @Test public void testCIPpriority5() throws StructureBuildingException { - FragmentManager fm = new FragmentManager(sBuilder, new IDManager()); - Fragment f = sBuilder.build("C1([H])(C(=O)O[H])C([H])([H])SC([H])([H])N([H])1", fm); - List cipOrdered = new CipSequenceRules(f.getFirstAtom()).getNeighbouringAtomsInCIPOrder(); + Fragment f = fm.buildSMILES("C1([H])(C(=O)O[H])C([H])([H])SC([H])([H])N([H])1"); + List cipOrdered = new CipSequenceRules(f.getFirstAtom()).getNeighbouringAtomsInCipOrder(); for (int i = 0; i < cipOrdered.size(); i++) { Atom a = cipOrdered.get(i); if (i==0){ - assertEquals("H", a.getElement()); + assertEquals(ChemEl.H, a.getElement()); } else if (i==1){ assertEquals(3, a.getID()); @@ -345,20 +323,19 @@ assertEquals(7, a.getID()); } else if (i==3){ - assertEquals("N", a.getElement()); + assertEquals(ChemEl.N, a.getElement()); } } } @Test public void testCIPpriority6() throws StructureBuildingException { - FragmentManager fm = new FragmentManager(sBuilder, new IDManager()); - Fragment f = sBuilder.build("C1([H])(O)C([H])(C([H])([H])[H])OC([H])([H])C([H])([H])C1([H])(O[H])", fm); - List cipOrdered = new CipSequenceRules(f.getFirstAtom()).getNeighbouringAtomsInCIPOrder(); + Fragment f = fm.buildSMILES("C1([H])(O)C([H])(C([H])([H])[H])OC([H])([H])C([H])([H])C1([H])(O[H])"); + List cipOrdered = new CipSequenceRules(f.getFirstAtom()).getNeighbouringAtomsInCipOrder(); for (int i = 0; i < cipOrdered.size(); i++) { Atom a = cipOrdered.get(i); if (i==0){ - assertEquals("H", a.getElement()); + assertEquals(ChemEl.H, a.getElement()); } else if (i==1){ assertEquals(17, a.getID()); @@ -367,20 +344,19 @@ assertEquals(4, a.getID()); } else if (i==3){ - assertEquals("O", a.getElement()); + assertEquals(ChemEl.O, a.getElement()); } } } @Test public void testCIPpriority7() throws StructureBuildingException { - FragmentManager fm = new FragmentManager(sBuilder, new IDManager()); - Fragment f = sBuilder.build("[H]OC2([H])(C([H])([H])C([H])([H])C3([H])(C4([H])(C([H])([H])C([H])([H])C1=C([H])C([H])([H])C([H])([H])C([H])([H])C1([H])C4([H])(C([H])([H])C([H])([H])C23(C([H])([H])[H])))))", fm); - List cipOrdered = new CipSequenceRules(f.getAtomList().get(34)).getNeighbouringAtomsInCIPOrder(); + Fragment f = fm.buildSMILES("[H]OC2([H])(C([H])([H])C([H])([H])C3([H])(C4([H])(C([H])([H])C([H])([H])C1=C([H])C([H])([H])C([H])([H])C([H])([H])C1([H])C4([H])(C([H])([H])C([H])([H])C23(C([H])([H])[H])))))"); + List cipOrdered = new CipSequenceRules(f.getAtomList().get(34)).getNeighbouringAtomsInCipOrder(); for (int i = 0; i < cipOrdered.size(); i++) { Atom a = cipOrdered.get(i); if (i==0){ - assertEquals("H", a.getElement()); + assertEquals(ChemEl.H, a.getElement()); } else if (i==1){ assertEquals(37, a.getID()); @@ -397,14 +373,14 @@ @Test public void testCIPpriority8() throws StructureBuildingException { Fragment f = n2s.parseChemicalName("(6aR)-6-phenyl-6,6a-dihydroisoindolo[2,1-a]quinazoline-5,11-dione").getStructure(); - List cipOrdered = new CipSequenceRules(f.getAtomByLocant("6a")).getNeighbouringAtomsInCIPOrder(); + List cipOrdered = new CipSequenceRules(f.getAtomByLocant("6a")).getNeighbouringAtomsInCipOrder(); for (int i = 0; i < cipOrdered.size(); i++) { Atom a = cipOrdered.get(i); if (i==0){ - assertEquals("H", a.getElement()); + assertEquals(ChemEl.H, a.getElement()); } else if (i==1){ - assertEquals("C", a.getElement()); + assertEquals(ChemEl.C, a.getElement()); } else if (i==2){ assertEquals("6", a.getFirstLocant()); @@ -417,14 +393,13 @@ @Test public void testCIPpriority9() throws StructureBuildingException { - BuildState state =new BuildState(mock(NameToStructureConfig.class), sBuilder); - Fragment f = state.fragManager.buildSMILES("C1(C=C)CC1C2=CC=CC=C2"); - StructureBuilder.makeHydrogensExplicit(state); - List cipOrdered = new CipSequenceRules(f.getFirstAtom()).getNeighbouringAtomsInCIPOrder(); + Fragment f = fm.buildSMILES("C1(C=C)CC1C2=CC=CC=C2"); + fm.makeHydrogensExplicit(); + List cipOrdered = new CipSequenceRules(f.getFirstAtom()).getNeighbouringAtomsInCipOrder(); for (int i = 0; i < cipOrdered.size(); i++) { Atom a = cipOrdered.get(i); if (i==0){ - assertEquals("H", a.getElement()); + assertEquals(ChemEl.H, a.getElement()); } else if (i==1){ assertEquals(4, a.getID()); @@ -440,14 +415,13 @@ @Test public void testCIPpriority10() throws StructureBuildingException { - BuildState state =new BuildState(mock(NameToStructureConfig.class), sBuilder); - Fragment f = state.fragManager.buildSMILES("C(O[H])([H])(C1([H])C([H])(F)C([H])(Cl)C([H])([H])C([H])(I)C1([H])([H]))C1([H])C([H])(F)C([H])(Br)C([H])([H])C([H])(Cl)C1([H])([H])"); - StructureBuilder.makeHydrogensExplicit(state); - List cipOrdered = new CipSequenceRules(f.getFirstAtom()).getNeighbouringAtomsInCIPOrder(); + Fragment f = fm.buildSMILES("C(O[H])([H])(C1([H])C([H])(F)C([H])(Cl)C([H])([H])C([H])(I)C1([H])([H]))C1([H])C([H])(F)C([H])(Br)C([H])([H])C([H])(Cl)C1([H])([H])"); + fm.makeHydrogensExplicit(); + List cipOrdered = new CipSequenceRules(f.getFirstAtom()).getNeighbouringAtomsInCipOrder(); for (int i = 0; i < cipOrdered.size(); i++) { Atom a = cipOrdered.get(i); if (i==0){ - assertEquals("H", a.getElement()); + assertEquals(ChemEl.H, a.getElement()); } else if (i==1){ assertEquals(5, a.getID()); @@ -456,23 +430,22 @@ assertEquals(22, a.getID()); } else if (i==3){ - assertEquals("O", a.getElement()); + assertEquals(ChemEl.O, a.getElement()); } } } @Test public void testCIPpriority11() throws StructureBuildingException { - BuildState state =new BuildState(mock(NameToStructureConfig.class), sBuilder); - Fragment f = state.fragManager.buildSMILES("C17C=CC23C45OC6C19.O74.O2C3.C5.C6(C)C.C9"); - StructureBuilder.makeHydrogensExplicit(state); + Fragment f = fm.buildSMILES("C17C=CC23C45OC6C19.O74.O2C3.C5.C6(C)C.C9"); + fm.makeHydrogensExplicit(); //stereocentres at 1,4,5,7,8 List atomList = f.getAtomList(); - List cipOrdered = new CipSequenceRules(atomList.get(0)).getNeighbouringAtomsInCIPOrder(); + List cipOrdered = new CipSequenceRules(atomList.get(0)).getNeighbouringAtomsInCipOrder(); for (int i = 0; i < cipOrdered.size(); i++) { Atom a = cipOrdered.get(i); if (i==0){ - assertEquals("H", a.getElement()); + assertEquals(ChemEl.H, a.getElement()); } else if (i==1){ assertEquals(2, a.getID()); @@ -481,10 +454,10 @@ assertEquals(8, a.getID()); } else if (i==3){ - assertEquals("O", a.getElement()); + assertEquals(ChemEl.O, a.getElement()); } } - cipOrdered = new CipSequenceRules(atomList.get(3)).getNeighbouringAtomsInCIPOrder(); + cipOrdered = new CipSequenceRules(atomList.get(3)).getNeighbouringAtomsInCipOrder(); for (int i = 0; i < cipOrdered.size(); i++) { Atom a = cipOrdered.get(i); if (i==0){ @@ -497,10 +470,10 @@ assertEquals(5, a.getID()); } else if (i==3){ - assertEquals("O", a.getElement()); + assertEquals(ChemEl.O, a.getElement()); } } - cipOrdered = new CipSequenceRules(atomList.get(4)).getNeighbouringAtomsInCIPOrder(); + cipOrdered = new CipSequenceRules(atomList.get(4)).getNeighbouringAtomsInCipOrder(); for (int i = 0; i < cipOrdered.size(); i++) { Atom a = cipOrdered.get(i); if (i==0){ @@ -516,11 +489,11 @@ assertEquals(9, a.getID()); } } - cipOrdered = new CipSequenceRules(atomList.get(6)).getNeighbouringAtomsInCIPOrder(); + cipOrdered = new CipSequenceRules(atomList.get(6)).getNeighbouringAtomsInCipOrder(); for (int i = 0; i < cipOrdered.size(); i++) { Atom a = cipOrdered.get(i); if (i==0){ - assertEquals("H", a.getElement()); + assertEquals(ChemEl.H, a.getElement()); } else if (i==1){ assertEquals(13, a.getID()); @@ -529,14 +502,14 @@ assertEquals(8, a.getID()); } else if (i==3){ - assertEquals("O", a.getElement()); + assertEquals(ChemEl.O, a.getElement()); } } - cipOrdered = new CipSequenceRules(atomList.get(7)).getNeighbouringAtomsInCIPOrder(); + cipOrdered = new CipSequenceRules(atomList.get(7)).getNeighbouringAtomsInCipOrder(); for (int i = 0; i < cipOrdered.size(); i++) { Atom a = cipOrdered.get(i); if (i==0){ - assertEquals("H", a.getElement()); + assertEquals(ChemEl.H, a.getElement()); } else if (i==1){ assertEquals(16, a.getID()); @@ -552,10 +525,9 @@ @Test public void testCIPpriority12() throws StructureBuildingException { - BuildState state =new BuildState(mock(NameToStructureConfig.class), sBuilder); - Fragment f = state.fragManager.buildSMILES("C1(C)(CCC(=O)N1)CCC(=O)NC(C)C"); - StructureBuilder.makeHydrogensExplicit(state); - List cipOrdered = new CipSequenceRules(f.getFirstAtom()).getNeighbouringAtomsInCIPOrder(); + Fragment f = fm.buildSMILES("C1(C)(CCC(=O)N1)CCC(=O)NC(C)C"); + fm.makeHydrogensExplicit(); + List cipOrdered = new CipSequenceRules(f.getFirstAtom()).getNeighbouringAtomsInCipOrder(); for (int i = 0; i < cipOrdered.size(); i++) { Atom a = cipOrdered.get(i); if (i==0){ @@ -568,21 +540,20 @@ assertEquals(8, a.getID()); } else if (i==3){ - assertEquals("N", a.getElement()); + assertEquals(ChemEl.N, a.getElement()); } } } @Test public void testCIPpriority13() throws StructureBuildingException { - BuildState state =new BuildState(mock(NameToStructureConfig.class), sBuilder); - Fragment f = state.fragManager.buildSMILES("C(O)(C#CC)C1=CC=CC=C1"); - StructureBuilder.makeHydrogensExplicit(state); - List cipOrdered = new CipSequenceRules(f.getFirstAtom()).getNeighbouringAtomsInCIPOrder(); + Fragment f = fm.buildSMILES("C(O)(C#CC)C1=CC=CC=C1"); + fm.makeHydrogensExplicit(); + List cipOrdered = new CipSequenceRules(f.getFirstAtom()).getNeighbouringAtomsInCipOrder(); for (int i = 0; i < cipOrdered.size(); i++) { Atom a = cipOrdered.get(i); if (i==0){ - assertEquals("H", a.getElement()); + assertEquals(ChemEl.H, a.getElement()); } else if (i==1){ assertEquals(6, a.getID()); @@ -598,9 +569,8 @@ @Test public void testCIPpriority14() throws StructureBuildingException { - BuildState state =new BuildState(mock(NameToStructureConfig.class), sBuilder); - Fragment f = state.fragManager.buildSMILES("C(Cl)([2H])([3H])[H]"); - List cipOrdered = new CipSequenceRules(f.getFirstAtom()).getNeighbouringAtomsInCIPOrder(); + Fragment f = fm.buildSMILES("C(Cl)([2H])([3H])[H]"); + List cipOrdered = new CipSequenceRules(f.getFirstAtom()).getNeighbouringAtomsInCipOrder(); for (int i = 0; i < cipOrdered.size(); i++) { Atom a = cipOrdered.get(i); if (i==0){ @@ -619,11 +589,30 @@ } @Test + public void testCIPpriority15() throws StructureBuildingException { + Fragment f = fm.buildSMILES("C([H])(O)(C(C(F)CCl)CCBr)C(C(F)CF)CCI"); + fm.makeHydrogensExplicit(); + List cipOrdered = new CipSequenceRules(f.getFirstAtom()).getNeighbouringAtomsInCipOrder(); + assertEquals(4, cipOrdered.size()); + assertEquals(2, cipOrdered.get(0).getID()); + assertEquals(12, cipOrdered.get(1).getID()); + assertEquals(4, cipOrdered.get(2).getID()); + assertEquals(3, cipOrdered.get(3).getID()); + } + + @Test(expected=CipOrderingException.class) + public void testCipUnassignable() throws StructureBuildingException { + //two sides of ring are identical + Fragment f = fm.buildSMILES("NC1(O)CCC(CCC2CCCCC2)CC1"); + new CipSequenceRules(f.getAtomList().get(1)).getNeighbouringAtomsInCipOrder(); + } + + @Test public void testAtomParityEquivalence1() { - Atom a1= new Atom(1, "C", mock(Fragment.class)); - Atom a2= new Atom(2, "C", mock(Fragment.class)); - Atom a3= new Atom(3, "C", mock(Fragment.class)); - Atom a4= new Atom(4, "C", mock(Fragment.class)); + Atom a1= new Atom(1, ChemEl.C, mock(Fragment.class)); + Atom a2= new Atom(2, ChemEl.C, mock(Fragment.class)); + Atom a3= new Atom(3, ChemEl.C, mock(Fragment.class)); + Atom a4= new Atom(4, ChemEl.C, mock(Fragment.class)); Atom[] atomRefs1 = new Atom[]{a1,a2,a3,a4}; Atom[] atomRefs2 = new Atom[]{a3,a4,a1,a2}; //2 swaps (4 by bubble sort) @@ -633,10 +622,10 @@ @Test public void testAtomParityEquivalence2() { - Atom a1= new Atom(1, "C", mock(Fragment.class)); - Atom a2= new Atom(2, "C", mock(Fragment.class)); - Atom a3= new Atom(3, "C", mock(Fragment.class)); - Atom a4= new Atom(4, "C", mock(Fragment.class)); + Atom a1= new Atom(1, ChemEl.C, mock(Fragment.class)); + Atom a2= new Atom(2, ChemEl.C, mock(Fragment.class)); + Atom a3= new Atom(3, ChemEl.C, mock(Fragment.class)); + Atom a4= new Atom(4, ChemEl.C, mock(Fragment.class)); Atom[] atomRefs1 = new Atom[]{a1,a2,a3,a4}; Atom[] atomRefs2 = new Atom[]{a2,a4,a1,a3}; //3 swaps @@ -646,15 +635,13 @@ @Test public void testCisTransUnambiguous() throws StructureBuildingException { - FragmentManager fm = new FragmentManager(sBuilder, new IDManager()); - Fragment f = sBuilder.build("[H]C([H])([H])C([H])=C([H])C([H])([H])[H]", fm); + Fragment f = fm.buildSMILES("[H]C([H])([H])C([H])=C([H])C([H])([H])[H]"); assertEquals(true, StereochemistryHandler.cisTransUnambiguousOnBond(f.findBond(5, 7))); } @Test public void testCisTransAmbiguous() throws StructureBuildingException { - FragmentManager fm = new FragmentManager(sBuilder, new IDManager()); - Fragment f = sBuilder.build("[H]C([H])([H])C(Cl)=C([H])C([H])([H])[H]", fm); + Fragment f = fm.buildSMILES("[H]C([H])([H])C(Cl)=C([H])C([H])([H])[H]"); assertEquals(false, StereochemistryHandler.cisTransUnambiguousOnBond(f.findBond(5, 7))); } @@ -680,7 +667,6 @@ @Test public void testIsTetrahedral() throws StructureBuildingException { - FragmentManager fm = new FragmentManager(sBuilder, new IDManager()); assertEquals(true, StereoAnalyser.isKnownPotentiallyStereogenic(fm.buildSMILES("C(N)(O)(Cl)Br").getFirstAtom())); assertEquals(true, StereoAnalyser.isKnownPotentiallyStereogenic(fm.buildSMILES("[Si](N)(O)(Cl)Br").getFirstAtom())); assertEquals(true, StereoAnalyser.isKnownPotentiallyStereogenic(fm.buildSMILES("[Ge](N)(O)(Cl)Br").getFirstAtom())); @@ -704,7 +690,6 @@ @Test public void testAchiralDueToResonance() throws StructureBuildingException { - FragmentManager fm = new FragmentManager(sBuilder, new IDManager()); assertEquals(true, StereoAnalyser.isAchiralDueToResonanceOrTautomerism(fm.buildSMILES("[S](=N)(=O)([O-])Br").getFirstAtom())); assertEquals(true, StereoAnalyser.isAchiralDueToResonanceOrTautomerism(fm.buildSMILES("[S](=O)([O-])Br").getFirstAtom())); assertEquals(false, StereoAnalyser.isAchiralDueToResonanceOrTautomerism(fm.buildSMILES("[S](=S)([O-])Br").getFirstAtom())); @@ -713,7 +698,6 @@ @Test public void testAchiralDueToTautomerism() throws StructureBuildingException { - FragmentManager fm = new FragmentManager(sBuilder, new IDManager()); assertEquals(true, StereoAnalyser.isAchiralDueToResonanceOrTautomerism(fm.buildSMILES("[S](=N)(=O)([OH])Br").getFirstAtom())); assertEquals(true, StereoAnalyser.isAchiralDueToResonanceOrTautomerism(fm.buildSMILES("[S](=O)([OH])Br").getFirstAtom())); assertEquals(false, StereoAnalyser.isAchiralDueToResonanceOrTautomerism(fm.buildSMILES("[S](=S)([OH])Br").getFirstAtom())); @@ -724,9 +708,8 @@ @Test public void testFindPseudoAsymmetricCarbon1() throws StructureBuildingException { - BuildState state =new BuildState(mock(NameToStructureConfig.class), sBuilder); - Fragment f = state.fragManager.buildSMILES("OCC(O)C(O)C(O)CO"); - StructureBuilder.makeHydrogensExplicit(state); + Fragment f = fm.buildSMILES("OCC(O)C(O)C(O)CO"); + fm.makeHydrogensExplicit(); StereoAnalyser stereoAnalyser = new StereoAnalyser(f); List stereoCentres = stereoAnalyser.findStereoCentres(); assertEquals(3, stereoCentres.size()); @@ -744,9 +727,8 @@ @Test public void testFindPseudoAsymmetricCarbon2() throws StructureBuildingException { - BuildState state =new BuildState(mock(NameToStructureConfig.class), sBuilder); - Fragment f = state.fragManager.buildSMILES("OCC(O)C(C(Cl)(Br)C)(C(Cl)(Br)C)C(O)CO"); - StructureBuilder.makeHydrogensExplicit(state); + Fragment f = fm.buildSMILES("OCC(O)C(C(Cl)(Br)C)(C(Cl)(Br)C)C(O)CO"); + fm.makeHydrogensExplicit(); StereoAnalyser stereoAnalyser = new StereoAnalyser(f); List stereoCentres = stereoAnalyser.findStereoCentres(); assertEquals(5, stereoCentres.size()); @@ -762,5 +744,14 @@ } } - + private String atomRefsToIdStr(Atom[] atomRefs4) { + StringBuilder sb = new StringBuilder(); + for (int i = 0; i < atomRefs4.length; i++) { + sb.append(atomRefs4[i].getID()); + if (i + 1 < atomRefs4.length) { + sb.append(' '); + } + } + return sb.toString(); + } } diff -Nru opsin-1.5.0/opsin-core/src/test/java/uk/ac/cam/ch/wwmm/opsin/StructureBuildingMethodsTest.java opsin-2.3.1/opsin-core/src/test/java/uk/ac/cam/ch/wwmm/opsin/StructureBuildingMethodsTest.java --- opsin-1.5.0/opsin-core/src/test/java/uk/ac/cam/ch/wwmm/opsin/StructureBuildingMethodsTest.java 2013-07-21 14:02:29.000000000 +0000 +++ opsin-2.3.1/opsin-core/src/test/java/uk/ac/cam/ch/wwmm/opsin/StructureBuildingMethodsTest.java 2017-07-23 20:55:18.000000000 +0000 @@ -4,19 +4,17 @@ import org.junit.Test; -import nu.xom.Attribute; -import nu.xom.Element; import static uk.ac.cam.ch.wwmm.opsin.XmlDeclarations.*; -import static junit.framework.Assert.assertEquals; +import static org.junit.Assert.*; import static org.mockito.Mockito.mock; public class StructureBuildingMethodsTest { @Test public void bracketedPrimeNotSpecialCase() { - Element word = new Element(WORD_EL); - Element substituent = new Element(SUBSTITUENT_EL); - word.appendChild(substituent); + Element word = new GroupingEl(WORD_EL); + Element substituent = new GroupingEl(SUBSTITUENT_EL); + word.addChild(substituent); assertEquals(null, StructureBuildingMethods.checkForBracketedPrimedLocantSpecialCase(substituent, "4")); assertEquals(null, StructureBuildingMethods.checkForBracketedPrimedLocantSpecialCase(substituent, "4'")); assertEquals(null, StructureBuildingMethods.checkForBracketedPrimedLocantSpecialCase(substituent, "4''")); @@ -24,11 +22,11 @@ @Test public void bracketedPrimeSpecialCase1() { - Element word = new Element(WORD_EL); - Element bracket = new Element(BRACKET_EL); - word.appendChild(bracket); - Element substituent = new Element(SUBSTITUENT_EL); - bracket.appendChild(substituent); + Element word = new GroupingEl(WORD_EL); + Element bracket = new GroupingEl(BRACKET_EL); + word.addChild(bracket); + Element substituent = new GroupingEl(SUBSTITUENT_EL); + bracket.addChild(substituent); assertEquals(null, StructureBuildingMethods.checkForBracketedPrimedLocantSpecialCase(substituent, "4")); assertEquals("4", StructureBuildingMethods.checkForBracketedPrimedLocantSpecialCase(substituent, "4'")); assertEquals(null, StructureBuildingMethods.checkForBracketedPrimedLocantSpecialCase(substituent, "4''")); @@ -40,13 +38,13 @@ @Test public void bracketedPrimeSpecialCase2() { - Element word = new Element(WORD_EL); - Element bracket = new Element(BRACKET_EL); - word.appendChild(bracket); - Element bracket2 = new Element(BRACKET_EL); - bracket.appendChild(bracket2); - Element substituent = new Element(SUBSTITUENT_EL); - bracket2.appendChild(substituent); + Element word = new GroupingEl(WORD_EL); + Element bracket = new GroupingEl(BRACKET_EL); + word.addChild(bracket); + Element bracket2 = new GroupingEl(BRACKET_EL); + bracket.addChild(bracket2); + Element substituent = new GroupingEl(SUBSTITUENT_EL); + bracket2.addChild(substituent); assertEquals(null, StructureBuildingMethods.checkForBracketedPrimedLocantSpecialCase(substituent, "4")); assertEquals(null, StructureBuildingMethods.checkForBracketedPrimedLocantSpecialCase(substituent, "4'")); assertEquals("4", StructureBuildingMethods.checkForBracketedPrimedLocantSpecialCase(substituent, "4''")); @@ -59,79 +57,79 @@ @Test public void notPhosphoSubstitution() throws StructureBuildingException { //standard unlocanted substitution - BuildState state = new BuildState(mock(NameToStructureConfig.class), new SMILESFragmentBuilder()); - Element word = new Element(WORD_EL); + BuildState state = new BuildState(mock(NameToStructureConfig.class)); + Element word = new GroupingEl(WORD_EL); - Element amino = new Element(GROUP_EL); + Element amino = new TokenEl(GROUP_EL); Fragment aminoFrag = state.fragManager.buildSMILES("-N"); - state.xmlFragmentMap.put(amino, aminoFrag); - Element substituent = new Element(SUBSTITUENT_EL); - substituent.appendChild(amino); - - Element methanol = new Element(GROUP_EL); - state.xmlFragmentMap.put(methanol, state.fragManager.buildSMILES("CO")); - Element root = new Element(ROOT_EL); - root.appendChild(methanol); + amino.setFrag(aminoFrag); + Element substituent = new GroupingEl(SUBSTITUENT_EL); + substituent.addChild(amino); + + Element methanol = new TokenEl(GROUP_EL); + methanol.setFrag(state.fragManager.buildSMILES("CO")); + Element root = new GroupingEl(ROOT_EL); + root.addChild(methanol); - word.appendChild(substituent); - word.appendChild(root); + word.addChild(substituent); + word.addChild(root); StructureBuildingMethods.resolveRootOrSubstituentUnLocanted(state, substituent); Set interFragmentBonds = state.fragManager.getInterFragmentBonds(aminoFrag); assertEquals(1, interFragmentBonds.size()); - assertEquals("C", interFragmentBonds.iterator().next().getOtherAtom(aminoFrag.getFirstAtom()).getElement()); + assertEquals(ChemEl.C, interFragmentBonds.iterator().next().getOtherAtom(aminoFrag.getFirstAtom()).getElement()); } @Test public void phosphoUnlocantedSubstitution() throws StructureBuildingException { - BuildState state = new BuildState(mock(NameToStructureConfig.class), new SMILESFragmentBuilder()); - Element word = new Element(WORD_EL); + BuildState state = new BuildState(mock(NameToStructureConfig.class)); + Element word = new GroupingEl(WORD_EL); - Element phospho = new Element(GROUP_EL); + Element phospho = new TokenEl(GROUP_EL); phospho.addAttribute(new Attribute(SUBTYPE_ATR, PHOSPHO_SUBTYPE_VAL)); Fragment phosphoFrag = state.fragManager.buildSMILES("-P(=O)O"); - state.xmlFragmentMap.put(phospho, phosphoFrag); - Element substituent = new Element(SUBSTITUENT_EL); - substituent.appendChild(phospho); - - Element methanol = new Element(GROUP_EL); - state.xmlFragmentMap.put(methanol, state.fragManager.buildSMILES("CO")); - Element root = new Element(ROOT_EL); - root.appendChild(methanol); + phospho.setFrag(phosphoFrag); + Element substituent = new GroupingEl(SUBSTITUENT_EL); + substituent.addChild(phospho); + + Element methanol = new TokenEl(GROUP_EL); + methanol.setFrag(state.fragManager.buildSMILES("CO")); + Element root = new GroupingEl(ROOT_EL); + root.addChild(methanol); - word.appendChild(substituent); - word.appendChild(root); + word.addChild(substituent); + word.addChild(root); StructureBuildingMethods.resolveRootOrSubstituentUnLocanted(state, substituent); Set interFragmentBonds = state.fragManager.getInterFragmentBonds(phosphoFrag); assertEquals(1, interFragmentBonds.size()); - assertEquals("O", interFragmentBonds.iterator().next().getOtherAtom(phosphoFrag.getFirstAtom()).getElement()); + assertEquals(ChemEl.O, interFragmentBonds.iterator().next().getOtherAtom(phosphoFrag.getFirstAtom()).getElement()); } @Test public void phosphoLocantedSubstitution() throws StructureBuildingException { - BuildState state = new BuildState(mock(NameToStructureConfig.class), new SMILESFragmentBuilder()); - Element word = new Element(WORD_EL); + BuildState state = new BuildState(mock(NameToStructureConfig.class)); + Element word = new GroupingEl(WORD_EL); - Element phospho = new Element(GROUP_EL); + Element phospho = new TokenEl(GROUP_EL); phospho.addAttribute(new Attribute(SUBTYPE_ATR, PHOSPHO_SUBTYPE_VAL)); Fragment phosphoFrag = state.fragManager.buildSMILES("-P(=O)O"); - state.xmlFragmentMap.put(phospho, phosphoFrag); - Element substituent = new Element(SUBSTITUENT_EL); + phospho.setFrag(phosphoFrag); + Element substituent = new GroupingEl(SUBSTITUENT_EL); substituent.addAttribute(new Attribute(LOCANT_ATR, "4")); - substituent.appendChild(phospho); + substituent.addChild(phospho); - Element methanol = new Element(GROUP_EL); - state.xmlFragmentMap.put(methanol, state.fragManager.buildSMILES("CCCCO","group","1/2/3/4/")); - Element root = new Element(ROOT_EL); - root.appendChild(methanol); + Element methanol = new TokenEl(GROUP_EL); + methanol.setFrag(state.fragManager.buildSMILES("CCCCO",methanol,"1/2/3/4/")); + Element root = new GroupingEl(ROOT_EL); + root.addChild(methanol); - word.appendChild(substituent); - word.appendChild(root); + word.addChild(substituent); + word.addChild(root); StructureBuildingMethods.resolveRootOrSubstituentLocanted(state, substituent); Set interFragmentBonds = state.fragManager.getInterFragmentBonds(phosphoFrag); assertEquals(1, interFragmentBonds.size()); - assertEquals("O", interFragmentBonds.iterator().next().getOtherAtom(phosphoFrag.getFirstAtom()).getElement()); + assertEquals(ChemEl.O, interFragmentBonds.iterator().next().getOtherAtom(phosphoFrag.getFirstAtom()).getElement()); } } diff -Nru opsin-1.5.0/opsin-core/src/test/java/uk/ac/cam/ch/wwmm/opsin/TokenizerTest.java opsin-2.3.1/opsin-core/src/test/java/uk/ac/cam/ch/wwmm/opsin/TokenizerTest.java --- opsin-1.5.0/opsin-core/src/test/java/uk/ac/cam/ch/wwmm/opsin/TokenizerTest.java 2013-07-21 14:02:29.000000000 +0000 +++ opsin-2.3.1/opsin-core/src/test/java/uk/ac/cam/ch/wwmm/opsin/TokenizerTest.java 2017-07-23 20:55:18.000000000 +0000 @@ -1,6 +1,6 @@ package uk.ac.cam.ch.wwmm.opsin; -import static junit.framework.Assert.assertEquals; +import static org.junit.Assert.*; import java.io.IOException; import java.util.List; diff -Nru opsin-1.5.0/opsin-core/src/test/java/uk/ac/cam/ch/wwmm/opsin/VerifyFragmentsTest.java opsin-2.3.1/opsin-core/src/test/java/uk/ac/cam/ch/wwmm/opsin/VerifyFragmentsTest.java --- opsin-1.5.0/opsin-core/src/test/java/uk/ac/cam/ch/wwmm/opsin/VerifyFragmentsTest.java 2013-07-21 14:02:29.000000000 +0000 +++ opsin-2.3.1/opsin-core/src/test/java/uk/ac/cam/ch/wwmm/opsin/VerifyFragmentsTest.java 2017-07-23 20:55:18.000000000 +0000 @@ -1,64 +1,109 @@ package uk.ac.cam.ch.wwmm.opsin; -import static junit.framework.Assert.*; - -import java.util.ArrayList; -import java.util.List; - -import nu.xom.Document; -import nu.xom.Element; -import nu.xom.Elements; +import javax.xml.stream.XMLStreamConstants; +import javax.xml.stream.XMLStreamException; +import javax.xml.stream.XMLStreamReader; +import org.junit.AfterClass; +import org.junit.BeforeClass; import org.junit.Test; + +import static org.junit.Assert.*; import static uk.ac.cam.ch.wwmm.opsin.XmlDeclarations.*; public class VerifyFragmentsTest { - private static final String RESOURCE_LOCATION = "uk/ac/cam/ch/wwmm/opsin/resources/"; - private static final ResourceGetter resourceGetter = new ResourceGetter(RESOURCE_LOCATION); + + private static ResourceGetter resourceGetter; + private static SMILESFragmentBuilder sBuilder; + + @BeforeClass + public static void setUp() { + resourceGetter = new ResourceGetter("uk/ac/cam/ch/wwmm/opsin/resources/"); + sBuilder = new SMILESFragmentBuilder(new IDManager()); + } + + @AfterClass + public static void cleanUp(){ + resourceGetter = null; + sBuilder = null; + } @Test public void verifySMILES() throws Exception { - FragmentManager fm = new FragmentManager(new SMILESFragmentBuilder(), new IDManager()); - Document tokenFileDoc = resourceGetter.getXMLDocument("index.xml"); - Elements tokenFiles = tokenFileDoc.getRootElement().getChildElements(); - for (int i = 0; i < tokenFiles.size(); i++) { - Element rootElement = resourceGetter.getXMLDocument(tokenFiles.get(i).getValue()).getRootElement(); - List tokenLists =new ArrayList(); - if (rootElement.getLocalName().equals("tokenLists")){//support for xml files with one "tokenList" or multiple "tokenList" under a "tokenLists" element - Elements children =rootElement.getChildElements(); - for (int j = 0; j opsin uk.ac.cam.ch.opsin - 1.5.0 + 2.3.1 opsin-inchi OPSIN_InChI_Support diff -Nru opsin-1.5.0/opsin-inchi/src/main/java/uk/ac/cam/ch/wwmm/opsin/InchiPruner.java opsin-2.3.1/opsin-inchi/src/main/java/uk/ac/cam/ch/wwmm/opsin/InchiPruner.java --- opsin-1.5.0/opsin-inchi/src/main/java/uk/ac/cam/ch/wwmm/opsin/InchiPruner.java 2013-07-21 14:02:29.000000000 +0000 +++ opsin-2.3.1/opsin-inchi/src/main/java/uk/ac/cam/ch/wwmm/opsin/InchiPruner.java 2017-07-23 20:55:18.000000000 +0000 @@ -11,7 +11,7 @@ * stereochemistry, fixed hydrogen and reconnected layers have been removed * The S indicating standard InChI is also removed * @param inchi - * @return + * @return InChI just containing the c,h,q,p,i layers */ public static String mainAndChargeLayers(String inchi){ String[] inchiLayers = inchi.split("/"); @@ -20,7 +20,7 @@ } List retainedLayers = new ArrayList(); if (Character.isLetter(inchiLayers[0].charAt(inchiLayers[0].length() -1))){//remove the S indicating this to be a standard InChI - inchiLayers[0]=inchiLayers[0].substring(0, inchiLayers[0].length() -1); + inchiLayers[0] = inchiLayers[0].substring(0, inchiLayers[0].length() -1); } retainedLayers.add(inchiLayers[0]);//version identifier retainedLayers.add(inchiLayers[1]);//molecular formula @@ -42,7 +42,7 @@ * fixed hydrogen and reconnected layers have been removed * The S indicating standard InChI is also removed * @param inchi - * @return + * @return InChI just containing the c,h,q,p,b,t,m,s,i layers */ public static String mainChargeAndStereochemistryLayers(String inchi){ String[] inchiLayers = inchi.split("/"); @@ -51,7 +51,7 @@ } List retainedLayers = new ArrayList(); if (Character.isLetter(inchiLayers[0].charAt(inchiLayers[0].length() -1))){//remove the S indicating this to be a standard InChI - inchiLayers[0]=inchiLayers[0].substring(0, inchiLayers[0].length() -1); + inchiLayers[0] = inchiLayers[0].substring(0, inchiLayers[0].length() -1); } retainedLayers.add(inchiLayers[0]);//version identifier retainedLayers.add(inchiLayers[1]);//molecular formula diff -Nru opsin-1.5.0/opsin-inchi/src/main/java/uk/ac/cam/ch/wwmm/opsin/NameToInchi.java opsin-2.3.1/opsin-inchi/src/main/java/uk/ac/cam/ch/wwmm/opsin/NameToInchi.java --- opsin-1.5.0/opsin-inchi/src/main/java/uk/ac/cam/ch/wwmm/opsin/NameToInchi.java 2013-07-21 14:02:29.000000000 +0000 +++ opsin-2.3.1/opsin-inchi/src/main/java/uk/ac/cam/ch/wwmm/opsin/NameToInchi.java 2017-07-23 20:55:18.000000000 +0000 @@ -8,7 +8,6 @@ import org.apache.log4j.Logger; import uk.ac.cam.ch.wwmm.opsin.BondStereo.BondStereoValue; - import net.sf.jniinchi.INCHI_BOND_TYPE; import net.sf.jniinchi.INCHI_OPTION; import net.sf.jniinchi.INCHI_PARITY; @@ -18,12 +17,13 @@ import net.sf.jniinchi.JniInchiException; import net.sf.jniinchi.JniInchiInput; import net.sf.jniinchi.JniInchiOutput; +import net.sf.jniinchi.JniInchiOutputKey; import net.sf.jniinchi.JniInchiStereo0D; import net.sf.jniinchi.JniInchiWrapper; /** - * Allows the conversion of OPSIN's output into (Std)InChIs - * Also can be used as wrapper to directly convert chemical names to (Std)InChIs + * Allows the conversion of OPSIN's output into (Std)InChIs or StdInChIKeys + * Also can be used, as a convenience method, to directly convert chemical names to (Std)InChIs or StdInChIKeys * @author dl387 * */ @@ -57,6 +57,17 @@ return convertResultToStdInChI(result); } + /**Parses a chemical name, returning a StdInChIKey for the molecule. + * Like StdInChI, StdInChIKeys aim to not be tautomer specific + * + * @param name The chemical name to parse. + * @return A StdInChIKey string or null if the molecule would not parse. + */ + public String parseToStdInchiKey(String name) { + OpsinResult result = n2s.parseChemicalName(name); + return convertResultToStdInChIKey(result); + } + /** * Converts an OPSIN result to InChI. Null is returned if this conversion fails * @param result @@ -77,8 +88,30 @@ return convertResultToInChI(result, true); } + /** + * Converts an OPSIN result to a StdInChIKey. Null is returned if this conversion fails + * Like StdInChI, StdInChIKeys aim to not be tautomer specific + * @param result + * @return String InChIKey + */ + public static String convertResultToStdInChIKey(OpsinResult result){ + String stdInchi = convertResultToInChI(result, true); + if (stdInchi != null){ + try { + JniInchiOutputKey key = JniInchiWrapper.getInchiKey(stdInchi); + return key.getKey(); + } catch (Exception e) { + if (LOG.isDebugEnabled()){ + LOG.debug(e.getMessage(), e); + } + return null; + } + } + return null; + } + private static String convertResultToInChI(OpsinResult result, boolean produceStdInChI){ - if (result.getStructure() !=null){ + if (result.getStructure() != null){ String inchi = null; try{ inchi = opsinFragmentToInchi(result.getStructure(), produceStdInChI); @@ -101,22 +134,20 @@ return null; } - private static String opsinFragmentToInchi(Fragment frag, boolean produceStdInChI) throws JniInchiException{ + private static String opsinFragmentToInchi(Fragment frag, boolean produceStdInChI) throws JniInchiException { HashMap opsinIdAtomMap = new HashMap(); JniInchiInput input; - if (produceStdInChI){ - input = new JniInchiInput(); - } - else{ - List options = new ArrayList(); + List options = new ArrayList(); + options.add(INCHI_OPTION.AuxNone); + if (!produceStdInChI){ options.add(INCHI_OPTION.FixedH); - input = new JniInchiInput(options); } + input = new JniInchiInput(options); List atomList =frag.getAtomList(); // Generate atoms for (Atom atom : atomList) { - JniInchiAtom jAtom = input.addAtom(new JniInchiAtom(0.0, 0.0, 0.0, atom.getElement())); + JniInchiAtom jAtom = input.addAtom(new JniInchiAtom(0.0, 0.0, 0.0, atom.getElement().toString())); jAtom.setCharge(atom.getCharge()); Integer isotope = atom.getIsotope(); if (isotope !=null){ diff -Nru opsin-1.5.0/opsin-inchi/src/test/java/uk/ac/cam/ch/wwmm/opsin/InchiOutputTest.java opsin-2.3.1/opsin-inchi/src/test/java/uk/ac/cam/ch/wwmm/opsin/InchiOutputTest.java --- opsin-1.5.0/opsin-inchi/src/test/java/uk/ac/cam/ch/wwmm/opsin/InchiOutputTest.java 2013-07-21 14:02:29.000000000 +0000 +++ opsin-2.3.1/opsin-inchi/src/test/java/uk/ac/cam/ch/wwmm/opsin/InchiOutputTest.java 2017-07-23 20:55:18.000000000 +0000 @@ -1,6 +1,6 @@ package uk.ac.cam.ch.wwmm.opsin; -import static org.junit.Assert.assertEquals; +import static org.junit.Assert.*; import org.junit.AfterClass; import org.junit.BeforeClass; @@ -23,21 +23,29 @@ @Test public void testStaticToInChI() throws StructureBuildingException{ - FragmentManager fm = new FragmentManager(new SMILESFragmentBuilder(), new IDManager()); - Fragment f = fm.buildSMILES("C([H])([H])([H])C(=O)N([H])[H]"); + SMILESFragmentBuilder sBuilder = new SMILESFragmentBuilder(new IDManager()); + Fragment f = sBuilder.build("C([H])([H])([H])C(=O)N([H])[H]"); OpsinResult result = new OpsinResult(f, OPSIN_RESULT_STATUS.SUCCESS, "", ""); assertEquals("InChI=1/C2H5NO/c1-2(3)4/h1H3,(H2,3,4)/f/h3H2", NameToInchi.convertResultToInChI(result)); } @Test public void testStaticToStdInChI() throws StructureBuildingException{ - FragmentManager fm = new FragmentManager(new SMILESFragmentBuilder(), new IDManager()); - Fragment f = fm.buildSMILES("C([H])([H])([H])C(=O)N([H])[H]"); + SMILESFragmentBuilder sBuilder = new SMILESFragmentBuilder(new IDManager()); + Fragment f = sBuilder.build("C([H])([H])([H])C(=O)N([H])[H]"); OpsinResult result = new OpsinResult(f, OPSIN_RESULT_STATUS.SUCCESS, "", ""); assertEquals("InChI=1S/C2H5NO/c1-2(3)4/h1H3,(H2,3,4)", NameToInchi.convertResultToStdInChI(result)); } @Test + public void testStaticToStdInChIKey() throws StructureBuildingException{ + SMILESFragmentBuilder sBuilder = new SMILESFragmentBuilder(new IDManager()); + Fragment f = sBuilder.build("C([H])([H])([H])C(=O)N([H])[H]"); + OpsinResult result = new OpsinResult(f, OPSIN_RESULT_STATUS.SUCCESS, "", ""); + assertEquals("DLFVBJFMPXGRIB-UHFFFAOYSA-N", NameToInchi.convertResultToStdInChIKey(result)); + } + + @Test public void testParseToInChI(){ assertEquals("InChI=1/C2H5NO/c1-2(3)4/h1H3,(H2,3,4)/f/h3H2", n2i.parseToInchi("acetamide")); } @@ -47,4 +55,9 @@ public void testParseToStdInChI(){ assertEquals("InChI=1S/C2H5NO/c1-2(3)4/h1H3,(H2,3,4)", n2i.parseToStdInchi("acetamide")); } + + @Test + public void testParseToStdInChIKey(){ + assertEquals("DLFVBJFMPXGRIB-UHFFFAOYSA-N", n2i.parseToStdInchiKey("acetamide")); + } } diff -Nru opsin-1.5.0/opsin-inchi/src/test/java/uk/ac/cam/ch/wwmm/opsin/NomenclatureIntegrationTest.java opsin-2.3.1/opsin-inchi/src/test/java/uk/ac/cam/ch/wwmm/opsin/NomenclatureIntegrationTest.java --- opsin-1.5.0/opsin-inchi/src/test/java/uk/ac/cam/ch/wwmm/opsin/NomenclatureIntegrationTest.java 2013-07-21 14:02:29.000000000 +0000 +++ opsin-2.3.1/opsin-inchi/src/test/java/uk/ac/cam/ch/wwmm/opsin/NomenclatureIntegrationTest.java 2017-07-23 20:55:18.000000000 +0000 @@ -1,16 +1,19 @@ package uk.ac.cam.ch.wwmm.opsin; -import static org.junit.Assert.fail; + +import static org.junit.Assert.*; import java.io.BufferedReader; import java.io.IOException; import java.io.InputStreamReader; +import org.apache.commons.io.IOUtils; import org.junit.AfterClass; import org.junit.BeforeClass; import org.junit.Test; public class NomenclatureIntegrationTest { private static NameToStructure n2s; + @BeforeClass public static void setUp() { n2s = NameToStructure.getInstance(); @@ -30,12 +33,44 @@ } @Test + public void testAcetals() throws IOException{ + NameToStructureConfig n2sConfig = NameToStructureConfig.getDefaultConfigInstance(); + n2sConfig.setAllowRadicals(true); + String file = "acetals.txt"; + checkNamesAgainstInChIs(file, n2sConfig); + } + + @Test + public void testAlcoholEsters() throws IOException{ + NameToStructureConfig n2sConfig = NameToStructureConfig.getDefaultConfigInstance(); + n2sConfig.setAllowRadicals(true); + String file = "alcoholEsters.txt"; + checkNamesAgainstInChIs(file, n2sConfig); + } + + @Test public void testCarbohydrates() throws IOException{ NameToStructureConfig n2sConfig = NameToStructureConfig.getDefaultConfigInstance(); n2sConfig.setAllowRadicals(true); String file = "carbohydrates.txt"; checkNamesAgainstInChIs(file, n2sConfig); } + + @Test + public void testChargeBalancing() throws IOException{ + NameToStructureConfig n2sConfig = NameToStructureConfig.getDefaultConfigInstance(); + n2sConfig.setAllowRadicals(true); + String file = "chargeBalancing.txt"; + checkNamesAgainstInChIs(file, n2sConfig); + } + + @Test + public void testConjunctiveNomenclature() throws IOException{ + NameToStructureConfig n2sConfig = NameToStructureConfig.getDefaultConfigInstance(); + n2sConfig.setAllowRadicals(true); + String file = "conjunctiveNomenclature.txt"; + checkNamesAgainstInChIs(file, n2sConfig); + } @Test public void testCyclicSuffixes() throws IOException{ @@ -62,6 +97,22 @@ } @Test + public void testIsotopes() throws IOException{ + NameToStructureConfig n2sConfig = NameToStructureConfig.getDefaultConfigInstance(); + n2sConfig.setAllowRadicals(true); + String file = "isotopes.txt"; + checkNamesAgainstInChIs(file, n2sConfig); + } + + @Test + public void testAdditiveNomenclature() throws IOException{ + NameToStructureConfig n2sConfig = NameToStructureConfig.getDefaultConfigInstance(); + n2sConfig.setAllowRadicals(true); + String file = "additiveNomenclature.txt"; + checkNamesAgainstInChIs(file, n2sConfig); + } + + @Test public void testMultiplicativeNomenclature() throws IOException{ NameToStructureConfig n2sConfig = NameToStructureConfig.getDefaultConfigInstance(); n2sConfig.setAllowRadicals(true); @@ -94,6 +145,14 @@ } @Test + public void testInorganicNomenclature() throws IOException{ + NameToStructureConfig n2sConfig = NameToStructureConfig.getDefaultConfigInstance(); + n2sConfig.setAllowRadicals(true); + String file = "inorganics.txt"; + checkNamesAgainstInChIs(file, n2sConfig); + } + + @Test public void testIonNomenclature() throws IOException{ NameToStructureConfig n2sConfig = NameToStructureConfig.getDefaultConfigInstance(); n2sConfig.setAllowRadicals(true); @@ -124,6 +183,14 @@ String file = "implicitBracketting.txt"; checkNamesAgainstInChIs(file, n2sConfig); } + + @Test + public void testStereochemistry() throws IOException{ + NameToStructureConfig n2sConfig = NameToStructureConfig.getDefaultConfigInstance(); + n2sConfig.setAllowRadicals(true); + String file = "stereochemistry.txt"; + checkNamesAgainstInChIs(file, n2sConfig); + } @Test public void testMiscellany() throws IOException{ @@ -134,7 +201,7 @@ } private void checkNamesAgainstInChIs(String file, NameToStructureConfig n2sConfig) throws IOException{ - BufferedReader input = new BufferedReader(new InputStreamReader(this.getClass().getResourceAsStream(file))); + BufferedReader input = new BufferedReader(new InputStreamReader(this.getClass().getResourceAsStream(file), "UTF-8")); try { String line = null; while ((line = input.readLine()) != null) { @@ -155,7 +222,7 @@ } } } finally { - input.close(); + IOUtils.closeQuietly(input); } } } diff -Nru opsin-1.5.0/opsin-inchi/src/test/resources/uk/ac/cam/ch/wwmm/opsin/acetals.txt opsin-2.3.1/opsin-inchi/src/test/resources/uk/ac/cam/ch/wwmm/opsin/acetals.txt --- opsin-1.5.0/opsin-inchi/src/test/resources/uk/ac/cam/ch/wwmm/opsin/acetals.txt 1970-01-01 00:00:00.000000000 +0000 +++ opsin-2.3.1/opsin-inchi/src/test/resources/uk/ac/cam/ch/wwmm/opsin/acetals.txt 2017-07-23 20:55:18.000000000 +0000 @@ -0,0 +1,20 @@ +propanal dimethyl acetal InChI=1S/C5H12O2/c1-4-5(6-2)7-3/h5H,4H2,1-3H3 +propanal diethyl acetal InChI=1S/C7H16O2/c1-4-7(8-5-2)9-6-3/h7H,4-6H2,1-3H3 +cyclohexanone ethyl methyl ketal InChI=1S/C9H18O2/c1-3-11-9(10-2)7-5-4-6-8-9/h3-8H2,1-2H3 +cyclohexane-1,4-dione 1-ethyl 1,4,4-trimethyl diketal InChI=1S/C11H22O4/c1-5-15-11(14-4)8-6-10(12-2,13-3)7-9-11/h5-9H2,1-4H3 +2-methylcyclohexane-1,4-dione 1,1-diethyl 4,4-dichloro diketal InChI=1S/C11H20Cl2O4/c1-4-14-11(15-5-2)7-6-10(16-12,17-13)8-9(11)3/h9H,4-8H2,1-3H3 +propanal ethylene acetal InChI=1S/C5H10O2/c1-2-5-6-3-4-7-5/h5H,2-4H2,1H3 +cyclohexanone ethylene ketal InChI=1S/C8H14O2/c1-2-4-8(5-3-1)9-6-7-10-8/h1-7H2 +3-(trimethylsilyl)propanal ethylene ketal InChI=1S/C8H18O2Si/c1-11(2,3)7-4-8-9-5-6-10-8/h8H,4-7H2,1-3H3 +butanal ethyl hemiacetal InChI=1S/C6H14O2/c1-3-5-6(7)8-4-2/h6-7H,3-5H2,1-2H3 +cyclohexanone methyl hemiketal InChI=1S/C7H14O2/c1-9-7(8)5-3-2-4-6-7/h8H,2-6H2,1H3 +pentanal diethyl dithioacetal InChI=1S/C9H20S2/c1-4-7-8-9(10-5-2)11-6-3/h9H,4-8H2,1-3H3 +propanal S-ethyl O-methyl monothioacetal InChI=1S/C6H14OS/c1-4-6(7-3)8-5-2/h6H,4-5H2,1-3H3 +cyclopentanone diethyl monothioketal InChI=1S/C9H18OS/c1-3-10-9(11-4-2)7-5-6-8-9/h3-8H2,1-2H3 +ethan-1-one ethylene monothioketal InChI=1S/C4H8OS/c1-4-5-2-3-6-4/h4H,2-3H2,1H3 +cyclohexanone Se-ethyl S-methyl selenothioketal InChI=1S/C9H18SSe/c1-3-11-9(10-2)7-5-4-6-8-9/h3-8H2,1-2H3 +cyclopentanone ethylene monoselenoketal InChI=1S/C7H12OSe/c1-2-4-7(3-1)8-5-6-9-7/h1-6H2 +propanal ethyl dithiohemiacetal InChI=1S/C5H12S2/c1-3-5(6)7-4-2/h5-6H,3-4H2,1-2H3 +propanal O-ethyl monothiohemiacetal InChI=1S/C5H12OS/c1-3-5(7)6-4-2/h5,7H,3-4H2,1-2H3 +cyclopentanone S-ethyl selenothiohemiketal InChI=1S/C7H14SSe/c1-2-8-7(9)5-3-4-6-7/h9H,2-6H2,1H3 +D-glucose diethyl mercaptal InChI=1S/C10H22O5S2/c1-3-16-10(17-4-2)9(15)8(14)7(13)6(12)5-11/h6-15H,3-5H2,1-2H3/t6-,7-,8+,9-/m1/s1 diff -Nru opsin-1.5.0/opsin-inchi/src/test/resources/uk/ac/cam/ch/wwmm/opsin/additiveNomenclature.txt opsin-2.3.1/opsin-inchi/src/test/resources/uk/ac/cam/ch/wwmm/opsin/additiveNomenclature.txt --- opsin-1.5.0/opsin-inchi/src/test/resources/uk/ac/cam/ch/wwmm/opsin/additiveNomenclature.txt 1970-01-01 00:00:00.000000000 +0000 +++ opsin-2.3.1/opsin-inchi/src/test/resources/uk/ac/cam/ch/wwmm/opsin/additiveNomenclature.txt 2017-07-23 20:55:18.000000000 +0000 @@ -0,0 +1,4 @@ +methylsulfonylbenzene InChI=1S/C7H8O2S/c1-10(8,9)7-5-3-2-4-6-7/h2-6H,1H3 +methylsulfonamidobenzene InChI=1S/C7H9NO2S/c1-11(9,10)8-7-5-3-2-4-6-7/h2-6,8H,1H3 +2-(N-(2-ethylphenyl)methylsulfonamido)-acetamide InChI=1S/C11H16N2O3S/c1-3-9-6-4-5-7-10(9)13(8-11(12)14)17(2,15)16/h4-7H,3,8H2,1-2H3,(H2,12,14) +methylcarbonylbenzene InChI=1S/C8H8O/c1-7(9)8-5-3-2-4-6-8/h2-6H,1H3 diff -Nru opsin-1.5.0/opsin-inchi/src/test/resources/uk/ac/cam/ch/wwmm/opsin/alcoholEsters.txt opsin-2.3.1/opsin-inchi/src/test/resources/uk/ac/cam/ch/wwmm/opsin/alcoholEsters.txt --- opsin-1.5.0/opsin-inchi/src/test/resources/uk/ac/cam/ch/wwmm/opsin/alcoholEsters.txt 1970-01-01 00:00:00.000000000 +0000 +++ opsin-2.3.1/opsin-inchi/src/test/resources/uk/ac/cam/ch/wwmm/opsin/alcoholEsters.txt 2017-07-23 20:55:18.000000000 +0000 @@ -0,0 +1,13 @@ +phenol acetate InChI=1S/C8H8O2/c1-7(9)10-8-5-3-2-4-6-8/h2-6H,1H3 +phenol acetate (1:1) InChI=1S/C6H6O.C2H4O2/c7-6-4-2-1-3-5-6;1-2(3)4/h1-5,7H;1H3,(H,3,4) +adenosine triphosphate InChI=1S/C10H16N5O13P3/c11-8-5-9(13-2-12-8)15(3-14-5)10-7(17)6(16)4(26-10)1-25-30(21,22)28-31(23,24)27-29(18,19)20/h2-4,6-7,10,16-17H,1H2,(H,21,22)(H,23,24)(H2,11,12,13)(H2,18,19,20)/t4-,6-,7-,10-/m1/s1 +adenosine 5'-triphosphate InChI=1S/C10H16N5O13P3/c11-8-5-9(13-2-12-8)15(3-14-5)10-7(17)6(16)4(26-10)1-25-30(21,22)28-31(23,24)27-29(18,19)20/h2-4,6-7,10,16-17H,1H2,(H,21,22)(H,23,24)(H2,11,12,13)(H2,18,19,20)/t4-,6-,7-,10-/m1/s1 +choline phosphate InChI=1S/C5H14NO4P/c1-6(2,3)4-5-10-11(7,8)9/h4-5H2,1-3H3,(H-,7,8,9)/p+1 +L-histidinol phosphate InChI=1S/C6H12N3O4P/c7-5(3-13-14(10,11)12)1-6-2-8-4-9-6/h2,4-5H,1,3,7H2,(H,8,9)(H2,10,11,12)/t5-/m0/s1 +1,2-Ethanediol 1-(4-methylbenzenesulfonate) InChI=1S/C9H12O4S/c1-8-2-4-9(5-3-8)14(11,12)13-7-6-10/h2-5,10H,6-7H2,1H3 +//counter examples +glycinium acetate InChI=1S/C2H5NO2.C2H4O2/c3-1-2(4)5;1-2(3)4/h1,3H2,(H,4,5);1H3,(H,3,4) +D-tryptophanol oxalate InChI=1S/C11H14N2O.C2H2O4/c12-9(7-14)5-8-6-13-11-4-2-1-3-10(8)11;3-1(4)2(5)6/h1-4,6,9,13-14H,5,7,12H2;(H,3,4)(H,5,6)/t9-;/m1./s1 +lysine acetate InChI=1S/C6H14N2O2.C2H4O2/c7-4-2-1-3-5(8)6(9)10;1-2(3)4/h5H,1-4,7-8H2,(H,9,10);1H3,(H,3,4)/t5-;/m0./s1 +piperidin-4-ol trifluoroacetate InChI=1S/C5H11NO.C2HF3O2/c7-5-1-3-6-4-2-5;3-2(4,5)1(6)7/h5-7H,1-4H2;(H,6,7) +piperidin-4-ol 2,2,2-trifluoroacetate InChI=1S/C5H11NO.C2HF3O2/c7-5-1-3-6-4-2-5;3-2(4,5)1(6)7/h5-7H,1-4H2;(H,6,7) \ No newline at end of file diff -Nru opsin-1.5.0/opsin-inchi/src/test/resources/uk/ac/cam/ch/wwmm/opsin/carbohydrates.txt opsin-2.3.1/opsin-inchi/src/test/resources/uk/ac/cam/ch/wwmm/opsin/carbohydrates.txt --- opsin-1.5.0/opsin-inchi/src/test/resources/uk/ac/cam/ch/wwmm/opsin/carbohydrates.txt 2013-07-21 14:02:29.000000000 +0000 +++ opsin-2.3.1/opsin-inchi/src/test/resources/uk/ac/cam/ch/wwmm/opsin/carbohydrates.txt 2017-07-23 20:55:18.000000000 +0000 @@ -69,7 +69,7 @@ 3-Deoxy-D-ribohexose InChI=1S/C6H12O5/c7-2-4(9)1-5(10)6(11)3-8/h2,4-6,8-11H,1,3H2/t4-,5+,6-/m1/s1 5-Deoxy-D-arabino-hept-3-ulose InChI=1S/C7H14O6/c8-2-4(10)1-5(11)7(13)6(12)3-9/h4-6,8-12H,1-3H2/t4-,5+,6+/m0/s1 6-Deoxy-L-gluco-oct-2-ulose InChI=1S/C8H16O7/c9-2-4(11)1-5(12)7(14)8(15)6(13)3-10/h4-5,7-12,14-15H,1-3H2/t4-,5+,7-,8-/m1/s1 -1 Deoxy-D-arabinitol InChI=1S/C5H12O4/c1-3(7)5(9)4(8)2-6/h3-9H,2H2,1H3/t3-,4-,5+/m1/s1 +1-Deoxy-D-arabinitol InChI=1S/C5H12O4/c1-3(7)5(9)4(8)2-6/h3-9H,2H2,1H3/t3-,4-,5+/m1/s1 5-Deoxy-D-arabinitol InChI=1S/C5H12O4/c1-3(7)5(9)4(8)2-6/h3-9H,2H2,1H3/t3-,4-,5-/m1/s1 5-Acetamido-3,5-dideoxy-D-glycero-alpha-D-galacto-non-2-ulopyranosonic acid InChI=1S/C11H19NO9/c1-4(14)12-7-5(15)2-11(20,10(18)19)21-9(7)8(17)6(16)3-13/h5-9,13,15-17,20H,2-3H2,1H3,(H,12,14)(H,18,19)/t5-,6+,7+,8+,9+,11+/m0/s1 N-acetyl-alpha-neuraminic acid InChI=1S/C11H19NO9/c1-4(14)12-7-5(15)2-11(20,10(18)19)21-9(7)8(17)6(16)3-13/h5-9,13,15-17,20H,2-3H2,1H3,(H,12,14)(H,18,19)/t5-,6+,7+,8+,9+,11+/m0/s1 @@ -189,23 +189,23 @@ N,N-Dimethyl-L-xylonamide InChI=1S/C7H15NO5/c1-8(2)7(13)6(12)5(11)4(10)3-9/h4-6,9-12H,3H2,1-2H3/t4-,5+,6-/m0/s1 Methyl 3-deoxy-D-threo-pentonate InChI=1S/C6H12O5/c1-11-6(10)5(9)2-4(8)3-7/h4-5,7-9H,2-3H2,1H3/t4-,5-/m0/s1 //Methyl tetra-O-acetyl-L-arabinonate InChI=1S/C14H20O10/c1-7(15)21-6-11(22-8(2)16)12(23-9(3)17)13(14(19)20-5)24-10(4)18/h11-13H,6H2,1-5H3/t11-,12-,13+/m0/s1 -//D-Glucono-1,4-lactone InChI=1S/C6H10O6/c7-1-2(8)5-3(9)4(10)6(11)12-5/h2-5,7-10H,1H2/t2-,3-,4-,5-/m1/s1 +D-Glucono-1,4-lactone InChI=1S/C6H10O6/c7-1-2(8)5-3(9)4(10)6(11)12-5/h2-5,7-10H,1H2/t2-,3-,4-,5-/m1/s1 //D-Gluconic acid gamma-lactone InChI=1S/C6H10O6/c7-1-2(8)5-3(9)4(10)6(11)12-5/h2-5,7-10H,1H2/t2-,3-,4-,5-/m1/s1 -//D-Glucono-1,5-lactone InChI=1S/C6H10O6/c7-1-2-3(8)4(9)5(10)6(11)12-2/h2-5,7-10H,1H2/t2-,3-,4+,5-/m1/s1 +D-Glucono-1,5-lactone InChI=1S/C6H10O6/c7-1-2-3(8)4(9)5(10)6(11)12-2/h2-5,7-10H,1H2/t2-,3-,4+,5-/m1/s1 //D-Gluconic acid delta-lactone InChI=1S/C6H10O6/c7-1-2-3(8)4(9)5(10)6(11)12-2/h2-5,7-10H,1H2/t2-,3-,4+,5-/m1/s1 -//3-Deoxy-D-ribo-hexono-1,5-lactone InChI=1S/C6H10O5/c7-2-5-3(8)1-4(9)6(10)11-5/h3-5,7-9H,1-2H2/t3-,4+,5+/m0/s1 -//5-Amino-5-deoxy-D-mannono-1,4-lactam InChI=1S/C6H12N2O5/c7-2(1-9)6(13)4(11)3(10)5(12)8-6/h2-4,9-11,13H,1,7H2,(H,8,12)/t2-,3+,4-,6-/m1/s1 +3-Deoxy-D-ribo-hexono-1,5-lactone InChI=1S/C6H10O5/c7-2-5-3(8)1-4(9)6(10)11-5/h3-5,7-9H,1-2H2/t3-,4+,5+/m0/s1 +//5-Amino-5-deoxy-D-mannono-1,5-lactam InChI=1S/C6H11NO5/c8-1-2-3(9)4(10)5(11)6(12)7-2/h2-5,8-11H,1H2,(H,7,12)/t2-,3-,4+,5+/m1/s1 //Penta-O-acetyl-D-gluconoyl chloride InChI=1S/C16H21ClO11/c1-7(18)24-6-12(25-8(2)19)13(26-9(3)20)14(27-10(4)21)15(16(17)23)28-11(5)22/h12-15H,6H2,1-5H3/t12-,13-,14+,15-/m1/s1 D-erythro-Pent-2-ulosonic acid InChI=1S/C5H8O6/c6-1-2(7)3(8)4(9)5(10)11/h2-3,6-8H,1H2,(H,10,11)/t2-,3-/m1/s1 D-arabino-Hex-5-ulosonic acid InChI=1S/C6H10O7/c7-1-2(8)3(9)4(10)5(11)6(12)13/h3-5,7,9-11H,1H2,(H,12,13)/t3-,4-,5+/m1/s1 alpha-D-arabino-Hex-2-ulopyranosonic acid InChI=1S/C6H10O7/c7-2-1-13-6(12,5(10)11)4(9)3(2)8/h2-4,7-9,12H,1H2,(H,10,11)/t2-,3-,4+,6-/m1/s1 3-Deoxy-alpha-D-manno-oct-2-ulopyranosonic acid InChI=1S/C8H14O8/c9-2-4(11)6-5(12)3(10)1-8(15,16-6)7(13)14/h3-6,9-12,15H,1-2H2,(H,13,14)/t3-,4-,5-,6-,8-/m1/s1 //Ethyl (methyl alpha-D-arabino-hex-2-u1opyranosid)onate -//beta-D-arabino-Hex-2-ulopyranosono-1,5-lactone InChI=1S/C6H8O6/c7-3-2-1-11-6(10,4(3)8)5(9)12-2/h2-4,7-8,10H,1H2/t2-,3-,4+,6+/m1/s1 +beta-D-arabino-Hex-2-ulopyranosono-1,5-lactone InChI=1S/C6H8O6/c7-3-2-1-11-6(10,4(3)8)5(9)12-2/h2-4,7-8,10H,1H2/t2-,3-,4+,6+/m1/s1 //Indol-3-yl D-xylo-hex-5-ulofuranosonate -//L-xylo-Hex-2-ulosono-1,4-lactone InChI=1S/C6H8O6/c7-1-2(8)5-3(9)4(10)6(11)12-5/h2-3,5,7-9H,1H2/t2-,3+,5+/m0/s1 -//L-threo-Hex-2-enono-1,4-lactone -//L-lyxo-Hex-2-ulosono-1,4-lactone InChI=1S/C6H8O6/c7-1-2(8)5-3(9)4(10)6(11)12-5/h2-3,5,7-9H,1H2/t2-,3-,5+/m0/s1 +L-xylo-Hex-2-ulosono-1,4-lactone InChI=1S/C6H8O6/c7-1-2(8)5-3(9)4(10)6(11)12-5/h2-3,5,7-9H,1H2/t2-,3+,5+/m0/s1 +L-threo-Hex-2-enono-1,4-lactone InChI=1S/C6H8O6/c7-1-2(8)5-3(9)4(10)6(11)12-5/h2,5,7-10H,1H2/t2-,5+/m0/s1 +L-lyxo-Hex-2-ulosono-1,4-lactone InChI=1S/C6H8O6/c7-1-2(8)5-3(9)4(10)6(11)12-5/h2-3,5,7-9H,1H2/t2-,3-,5+/m0/s1 D-Glucuronic acid InChI=1S/C6H10O7/c7-1-2(8)3(9)4(10)5(11)6(12)13/h1-5,8-11H,(H,12,13)/t2-,3+,4-,5-/m0/s1 alpha-D-Mannopyranuronic acid InChI=1S/C6H10O7/c7-1-2(8)4(5(10)11)13-6(12)3(1)9/h1-4,6-9,12H,(H,10,11)/t1-,2-,3-,4-,6-/m0/s1 Phenyl beta-D-glucopyranosiduronic acid InChI=1S/C12H14O7/c13-7-8(14)10(11(16)17)19-12(9(7)15)18-6-4-2-1-3-5-6/h1-5,7-10,12-15H,(H,16,17)/t7-,8-,9+,10-,12+/m0/s1 @@ -214,9 +214,9 @@ Methyl alpha-L-glucofuranosidurononitrile InChI=1S/C7H11NO5/c1-12-7-5(11)4(10)6(13-7)3(9)2-8/h3-7,9-11H,1H3/t3-,4-,5-,6-,7+/m0/s1 //Sodium (methyl alpha-L-g1ucofuranosid)uronate Ethyl 2,3,5-tri-O-benzoyl-alpha-D-mannofuranuronate InChI=1S/C29H26O10/c1-2-35-28(33)23(38-26(31)19-14-8-4-9-15-19)21-22(36-25(30)18-12-6-3-7-13-18)24(29(34)37-21)39-27(32)20-16-10-5-11-17-20/h3-17,21-24,29,34H,2H2,1H3/t21-,22-,23-,24-,29-/m0/s1 -//D-Glucurono-6,3-lactone InChI=1S/C6H8O6/c7-1-2(8)5-3(9)4(10)6(11)12-5/h1-5,8-10H/t2-,3+,4-,5+/m0/s1 -//D-Glucofuranurono-6,3-lactone InChI=1S/C6H8O6/c7-1-3-4(12-5(1)9)2(8)6(10)11-3/h1-5,7-9H/t1-,2+,3-,4-,5?/m1/s1 -//Methyl alpha-D-glucofuranosidurono-6,3-lactone +D-Glucurono-6,3-lactone InChI=1S/C6H8O6/c7-1-2(8)5-3(9)4(10)6(11)12-5/h1-5,8-10H/t2-,3+,4-,5+/m0/s1 +D-Glucofuranurono-6,3-lactone InChI=1S/C6H8O6/c7-1-3-4(12-5(1)9)2(8)6(10)11-3/h1-5,7-9H/t1-,2+,3-,4-,5?/m1/s1 +Methyl alpha-D-glucofuranosidurono-6,3-lactone InChI=1S/C7H10O6/c1-11-7-3(9)5-4(13-7)2(8)6(10)12-5/h2-5,7-9H,1H3/t2-,3+,4+,5+,7-/m0/s1 4-Deoxy-L-threo-hex-4-enopyranuronic acid InChI=1S/C6H8O6/c7-2-1-3(5(9)10)12-6(11)4(2)8/h1-2,4,6-8,11H,(H,9,10)/t2-,4+,6?/m0/s1 Methyl 4-deoxy-L-threo-hex-4-enopyranuronate InChI=1S/C7H10O6/c1-12-6(10)4-2-3(8)5(9)7(11)13-4/h2-3,5,7-9,11H,1H3/t3-,5+,7?/m0/s1 Methyl 4-deoxy-alpha-L-threo-hex-4-enopyranosiduronic acid InChI=1S/C7H10O6/c1-12-7-5(9)3(8)2-4(13-7)6(10)11/h2-3,5,7-9H,1H3,(H,10,11)/t3-,5+,7+/m0/s1 @@ -228,5 +228,29 @@ //meso-Xylaric acid InChI=1S/C5H8O7/c6-1(2(7)4(9)10)3(8)5(11)12/h1-3,6-8H,(H,9,10)(H,11,12)/t1-,2-,3+ //meso-Galactaric acid InChI=1S/C6H10O8/c7-1(3(9)5(11)12)2(8)4(10)6(13)14/h1-4,7-10H,(H,11,12)(H,13,14)/t1-,2+,3+,4- 4-O-Methyl-D-galactaric acid InChI=1S/C7H12O8/c1-15-5(4(10)7(13)14)2(8)3(9)6(11)12/h2-5,8-10H,1H3,(H,11,12)(H,13,14)/t2-,3-,4+,5-/m1/s1 +//(2R,3R)-Tartaric acid InChI=1S/C4H6O6/c5-1(3(7)8)2(6)4(9)10/h1-2,5-6H,(H,7,8)(H,9,10)/t1-,2-/m1/s1 +//(+)-Tartaric acid InChI=1S/C4H6O6/c5-1(3(7)8)2(6)4(9)10/h1-2,5-6H,(H,7,8)(H,9,10)/t1-,2-/m1/s1 +L-threaric acid InChI=1S/C4H6O6/c5-1(3(7)8)2(6)4(9)10/h1-2,5-6H,(H,7,8)(H,9,10)/t1-,2-/m1/s1 +//(2S,3S)-Tartaric acid InChI=1S/C4H6O6/c5-1(3(7)8)2(6)4(9)10/h1-2,5-6H,(H,7,8)(H,9,10)/t1-,2-/m0/s1 +//(-)-Tartaric acid InChI=1S/C4H6O6/c5-1(3(7)8)2(6)4(9)10/h1-2,5-6H,(H,7,8)(H,9,10)/t1-,2-/m0/s1 +D-threaric acid InChI=1S/C4H6O6/c5-1(3(7)8)2(6)4(9)10/h1-2,5-6H,(H,7,8)(H,9,10)/t1-,2-/m0/s1 +//(2R,3S)-Tartaric acid InChI=1S/C4H6O6/c5-1(3(7)8)2(6)4(9)10/h1-2,5-6H,(H,7,8)(H,9,10)/t1-,2+ +//meso-Tartaric acid InChI=1S/C4H6O6/c5-1(3(7)8)2(6)4(9)10/h1-2,5-6H,(H,7,8)(H,9,10)/t1-,2+ +erythraric acid InChI=1S/C4H6O6/c5-1(3(7)8)2(6)4(9)10/h1-2,5-6H,(H,7,8)(H,9,10)/t1-,2+ +1-Methyl hydrogen D-galactarate InChI=1S/C7H12O8/c1-15-7(14)5(11)3(9)2(8)4(10)6(12)13/h2-5,8-11H,1H3,(H,12,13)/t2-,3+,4+,5-/m1/s1 +6-Methyl hydrogen D-galactarate InChI=1S/C7H12O8/c1-15-7(14)5(11)3(9)2(8)4(10)6(12)13/h2-5,8-11H,1H3,(H,12,13)/t2-,3+,4+,5-/m0/s1 +//D-Glucar-1-amic acid +//Methyl D-glucar-6-amate +L-mannaro-1,4:6,3-dilactone InChI=1S/C6H6O6/c7-1-3-4(12-5(1)9)2(8)6(10)11-3/h1-4,7-8H/t1-,2-,3+,4+/m1/s1 +//The above are names from 2-Carb-1 through 2-Carb-23 +//2-Carb-37.3 +alpha-D-Glucopyranosyl-(1->4)-[alpha-D-glucopyranosyl-(1->6)]-D-glucopyranose InChI=1S/C18H32O16/c19-1-4-7(21)9(23)13(27)17(32-4)30-3-6-15(11(25)12(26)16(29)31-6)34-18-14(28)10(24)8(22)5(2-20)33-18/h4-29H,1-3H2/t4-,5-,6-,7-,8-,9+,10+,11-,12-,13-,14-,15-,16?,17+,18-/m1/s1 +4,6-di-O-(alpha-D-glucopyranosyl)-D-glucopyranose InChI=1S/C18H32O16/c19-1-4-7(21)9(23)13(27)17(32-4)30-3-6-15(11(25)12(26)16(29)31-6)34-18-14(28)10(24)8(22)5(2-20)33-18/h4-29H,1-3H2/t4-,5-,6-,7-,8-,9+,10+,11-,12-,13-,14-,15-,16?,17+,18-/m1/s1 +(5-Acetamido-3,5-dideoxy-D-glycero-alpha-D-galacto-non-2-ulopyranosylonic acid)-(2->3)-beta-D-galactopyranosyl-(1->3)-[alpha-L-fucopyranosyl-(1->4)]-2-acetamido-2-deoxy-D-glucopyranose InChI=1S/C31H52N2O23/c1-8-17(41)20(44)21(45)28(50-8)53-23-14(7-36)51-27(47)16(33-10(3)38)25(23)54-29-22(46)26(19(43)13(6-35)52-29)56-31(30(48)49)4-11(39)15(32-9(2)37)24(55-31)18(42)12(40)5-34/h8,11-29,34-36,39-47H,4-7H2,1-3H3,(H,32,37)(H,33,38)(H,48,49)/t8-,11-,12+,13+,14+,15+,16+,17+,18+,19-,20+,21-,22+,23+,24+,25+,26-,27?,28-,29-,31-/m0/s1 +//5-N-acetyl-alpha-neuraminyl-(2->3)-beta-D-galactopyranosyl-(1->3)-[alpha-L-fucopyranosyl-(1->4)]-2-acetamido-2-deoxy-D-glucopyranose InChI=1S/C31H52N2O23/c1-8-17(41)20(44)21(45)28(50-8)53-23-14(7-36)51-27(47)16(33-10(3)38)25(23)54-29-22(46)26(19(43)13(6-35)52-29)56-31(30(48)49)4-11(39)15(32-9(2)37)24(55-31)18(42)12(40)5-34/h8,11-29,34-36,39-47H,4-7H2,1-3H3,(H,32,37)(H,33,38)(H,48,49)/t8-,11-,12+,13+,14+,15+,16+,17+,18+,19-,20+,21-,22+,23+,24+,25+,26-,27?,28-,29-,31-/m0/s1 +2-Acetamido-2-deoxy-alpha-D-galactopyranosyl-(1->3)-[alpha-L-fucopyranosyl-(1->2)]-D-galactopyranose InChI=1S/C20H35NO15/c1-5-10(25)14(29)15(30)20(32-5)36-17-16(12(27)8(4-23)33-18(17)31)35-19-9(21-6(2)24)13(28)11(26)7(3-22)34-19/h5,7-20,22-23,25-31H,3-4H2,1-2H3,(H,21,24)/t5-,7+,8+,9+,10+,11-,12-,13+,14+,15-,16-,17+,18?,19+,20-/m0/s1 //CAS -D-gluco-2-hept-ulose InChI=1S/C7H14O7/c8-1-3(10)5(12)7(14)6(13)4(11)2-9/h3,5-10,12-14H,1-2H2/t3-,5-,6+,7+/m1/s1 \ No newline at end of file +D-gluco-2-hept-ulose InChI=1S/C7H14O7/c8-1-3(10)5(12)7(14)6(13)4(11)2-9/h3,5-10,12-14H,1-2H2/t3-,5-,6+,7+/m1/s1 +//Misc +5-acetamido-3,5-dideoxy-D-glycero-alpha-D-galacto-non-2-ulopyranonosyl-(2->3)-beta-D-galactopyranose InChI=1S/C17H29NO14/c1-5(21)18-9-6(22)2-17(16(28)29,31-13(9)10(24)7(23)3-19)32-14-11(25)8(4-20)30-15(27)12(14)26/h6-15,19-20,22-27H,2-4H2,1H3,(H,18,21)(H,28,29)/t6-,7+,8+,9+,10+,11-,12+,13+,14-,15+,17-/m0/s1 +glucosyl(1->6)glucopyranose InChI=1S/C12H22O11/c13-1-3-5(14)8(17)10(19)12(23-3)21-2-4-6(15)7(16)9(18)11(20)22-4/h3-20H,1-2H2/t3-,4-,5-,6-,7+,8+,9-,10-,11?,12?/m1/s1 \ No newline at end of file diff -Nru opsin-1.5.0/opsin-inchi/src/test/resources/uk/ac/cam/ch/wwmm/opsin/chargeBalancing.txt opsin-2.3.1/opsin-inchi/src/test/resources/uk/ac/cam/ch/wwmm/opsin/chargeBalancing.txt --- opsin-1.5.0/opsin-inchi/src/test/resources/uk/ac/cam/ch/wwmm/opsin/chargeBalancing.txt 1970-01-01 00:00:00.000000000 +0000 +++ opsin-2.3.1/opsin-inchi/src/test/resources/uk/ac/cam/ch/wwmm/opsin/chargeBalancing.txt 2017-07-23 20:55:18.000000000 +0000 @@ -0,0 +1,7 @@ +ammonium chloride InChI=1S/ClH.H3N/h1H;1H3 +sodium chloride InChI=1S/ClH.Na/h1H;/q;+1/p-1 +magnesium chloride InChI=1S/2ClH.Mg/h2*1H;/q;;+2/p-2 +iron(3+) oxide InChI=1S/2Fe.3O/q2*+3;3*-2 +sodium citrate InChI=1S/C6H8O7.3Na/c7-3(8)1-6(13,5(11)12)2-4(9)10;;;/h13H,1-2H2,(H,7,8)(H,9,10)(H,11,12);;;/q;3*+1/p-3 +caffeine citrate InChI=1S/C8H10N4O2.C6H8O7/c1-10-4-9-6-5(10)7(13)12(3)8(14)11(6)2;7-3(8)1-6(13,5(11)12)2-4(9)10/h4H,1-3H3;13H,1-2H2,(H,7,8)(H,9,10)(H,11,12) +tetrabutylammonium tribromide InChI=1S/C16H36N.Br3/c1-5-9-13-17(14-10-6-2,15-11-7-3)16-12-8-4;1-3-2/h5-16H2,1-4H3;/q+1;-1 diff -Nru opsin-1.5.0/opsin-inchi/src/test/resources/uk/ac/cam/ch/wwmm/opsin/conjunctiveNomenclature.txt opsin-2.3.1/opsin-inchi/src/test/resources/uk/ac/cam/ch/wwmm/opsin/conjunctiveNomenclature.txt --- opsin-1.5.0/opsin-inchi/src/test/resources/uk/ac/cam/ch/wwmm/opsin/conjunctiveNomenclature.txt 1970-01-01 00:00:00.000000000 +0000 +++ opsin-2.3.1/opsin-inchi/src/test/resources/uk/ac/cam/ch/wwmm/opsin/conjunctiveNomenclature.txt 2017-07-23 20:55:18.000000000 +0000 @@ -0,0 +1,5 @@ +benzeneacetic acid InChI=1S/C8H8O2/c9-8(10)6-7-4-2-1-3-5-7/h1-5H,6H2,(H,9,10) +benzeneethylamine InChI=1S/C8H11N/c9-7-6-8-4-2-1-3-5-8/h1-5H,6-7,9H2 +cyclohexaneacetic acid piperidide InChI=1S/C13H23NO/c15-13(14-9-5-2-6-10-14)11-12-7-3-1-4-8-12/h12H,1-11H2 +L-aspartic acid N,N-diacetic acid InChI=1S/C8H11NO8/c10-5(11)1-4(8(16)17)9(2-6(12)13)3-7(14)15/h4H,1-3H2,(H,10,11)(H,12,13)(H,14,15)(H,16,17)/t4-/m0/s1 +beta-alanine diacetic acid InChI=1S/C7H11NO6/c9-5(10)1-2-8(3-6(11)12)4-7(13)14/h1-4H2,(H,9,10)(H,11,12)(H,13,14) diff -Nru opsin-1.5.0/opsin-inchi/src/test/resources/uk/ac/cam/ch/wwmm/opsin/epoxyLike.txt opsin-2.3.1/opsin-inchi/src/test/resources/uk/ac/cam/ch/wwmm/opsin/epoxyLike.txt --- opsin-1.5.0/opsin-inchi/src/test/resources/uk/ac/cam/ch/wwmm/opsin/epoxyLike.txt 2013-07-21 14:02:29.000000000 +0000 +++ opsin-2.3.1/opsin-inchi/src/test/resources/uk/ac/cam/ch/wwmm/opsin/epoxyLike.txt 2017-07-23 20:55:18.000000000 +0000 @@ -5,4 +5,5 @@ 3,4-epoxybutanol InChI=1S/C4H8O2/c5-2-1-4-3-6-4/h4-5H,1-3H2 2,3-epoxypyridine InChI=1S/C5H3NO/c1-2-4-5(7-4)6-3-1/h1-3H methylenedioxydibenzene InChI=1S/C13H12O2/c1-3-7-12(8-4-1)14-11-15-13-9-5-2-6-10-13/h1-10H,11H2 -3,4'-methylenedioxydipyridine InChI=1S/C11H10N2O2/c1-2-11(8-13-5-1)15-9-14-10-3-6-12-7-4-10/h1-8H,9H2 \ No newline at end of file +3,4'-methylenedioxydipyridine InChI=1S/C11H10N2O2/c1-2-11(8-13-5-1)15-9-14-10-3-6-12-7-4-10/h1-8H,9H2 +3,4-epoxy-1-phenyl-butane InChI=1S/C10H12O/c1-2-4-9(5-3-1)6-7-10-8-11-10/h1-5,10H,6-8H2 diff -Nru opsin-1.5.0/opsin-inchi/src/test/resources/uk/ac/cam/ch/wwmm/opsin/functionalClasses.txt opsin-2.3.1/opsin-inchi/src/test/resources/uk/ac/cam/ch/wwmm/opsin/functionalClasses.txt --- opsin-1.5.0/opsin-inchi/src/test/resources/uk/ac/cam/ch/wwmm/opsin/functionalClasses.txt 2013-07-21 14:02:29.000000000 +0000 +++ opsin-2.3.1/opsin-inchi/src/test/resources/uk/ac/cam/ch/wwmm/opsin/functionalClasses.txt 2017-07-23 20:55:18.000000000 +0000 @@ -2,4 +2,25 @@ N,N'-dimethyl-1,4-naphthoquinone diimine InChI=1S/C12H12N2/c1-13-11-7-8-12(14-2)10-6-4-3-5-9(10)11/h3-8H,1-2H3 acetic acid amide InChI=1S/C2H5NO/c1-2(3)4/h1H3,(H2,3,4) acetic acid N-methylamide InChI=1S/C3H7NO/c1-3(5)4-2/h1-2H3,(H,4,5) -acetic acid N-methyl amide InChI=1S/C3H7NO/c1-3(5)4-2/h1-2H3,(H,4,5) \ No newline at end of file +acetic acid N-methyl amide InChI=1S/C3H7NO/c1-3(5)4-2/h1-2H3,(H,4,5) +2-pyridylformaldehyde semicarbazone InChI=1/C7H8N4O/c8-7(12)11-10-5-6-3-1-2-4-9-6/h1-5H,(H3,8,11,12)/f/h11H,8H2 +ibuprofen methyl ester InChI=1S/C14H20O2/c1-10(2)9-12-5-7-13(8-6-12)11(3)14(15)16-4/h5-8,10-11H,9H2,1-4H3 +acetic acid ethyl ester InChI=1S/C4H8O2/c1-3-6-4(2)5/h3H2,1-2H3 +acetate ethyl ester InChI=1S/C4H8O2/c1-3-6-4(2)5/h3H2,1-2H3 +acetone ethyloxime InChI=1S/C5H11NO/c1-4-7-6-5(2)3/h4H2,1-3H3 +acetone O-ethyloxime InChI=1S/C5H11NO/c1-4-7-6-5(2)3/h4H2,1-3H3 +acetone O2-ethyloxime InChI=1S/C5H11NO/c1-4-7-6-5(2)3/h4H2,1-3H3 +diphosphoric acid 1,3-di(ethylamide) InChI=1/C4H14N2O5P2/c1-3-5-12(7,8)11-13(9,10)6-4-2/h3-4H2,1-2H3,(H2,5,7,8)(H2,6,9,10) +benzene-1,4-dicarboxylic acid chloride InChI=1S/C8H4Cl2O2/c9-7(11)5-1-2-6(4-3-5)8(10)12/h1-4H +ethylene glycol methacrylate phosphate InChI=1S/C6H11O6P/c1-5(2)6(7)11-3-4-12-13(8,9)10/h1,3-4H2,2H3,(H2,8,9,10) +Bisphenol A diglycidyl ether InChI=1/C21H24O4/c1-21(2,15-3-7-17(8-4-15)22-11-19-13-24-19)16-5-9-18(10-6-16)23-12-20-14-25-20/h3-10,19-20H,11-14H2,1-2H3 +1,4-butanediol diglycidyl ether InChI=1S/C10H18O4/c1(3-11-5-9-7-13-9)2-4-12-6-10-8-14-10/h9-10H,1-8H2 +ethylene glycol ethyl ether acetate InChI=1S/C6H12O3/c1-3-8-4-5-9-6(2)7/h3-5H2,1-2H3 +diethylene glycol ethyl methyl ether InChI=1S/C7H16O3/c1-3-9-6-7-10-5-4-8-2/h3-7H2,1-2H3 +glycerol monooleate InChI=1/C21H40O4/c1-2-3-4-5-6-7-8-9-10-11-12-13-14-15-16-17-21(24)25-19-20(23)18-22/h9-10,20,22-23H,2-8,11-19H2,1H3/b10-9- +glycerol triglycidyl ether InChI=1S/C12H20O6/c1(13-3-10-5-16-10)9(15-7-12-8-18-12)2-14-4-11-6-17-11/h9-12H,1-8H2 +acetic acid ammonium salt InChI=1S/C2H4O2.H3N/c1-2(3)4;/h1H3,(H,3,4);1H3 +acetic acid ammonia salt InChI=1S/C2H4O2.H3N/c1-2(3)4;/h1H3,(H,3,4);1H3 +acetic acid sodium salt InChI=1S/C2H4O2.Na/c1-2(3)4;/h1H3,(H,3,4);/q;+1/p-1 +acetic acid sodium(0) salt InChI=1S/C2H4O2.Na/c1-2(3)4;/h1H3,(H,3,4); +pyridine acetic acid salt InChI=1S/C5H5N.C2H4O2/c1-2-4-6-5-3-1;1-2(3)4/h1-5H;1H3,(H,3,4) diff -Nru opsin-1.5.0/opsin-inchi/src/test/resources/uk/ac/cam/ch/wwmm/opsin/functionalReplacement.txt opsin-2.3.1/opsin-inchi/src/test/resources/uk/ac/cam/ch/wwmm/opsin/functionalReplacement.txt --- opsin-1.5.0/opsin-inchi/src/test/resources/uk/ac/cam/ch/wwmm/opsin/functionalReplacement.txt 2013-07-21 14:02:29.000000000 +0000 +++ opsin-2.3.1/opsin-inchi/src/test/resources/uk/ac/cam/ch/wwmm/opsin/functionalReplacement.txt 2017-07-23 20:55:18.000000000 +0000 @@ -17,4 +17,5 @@ 2-chloro-2-thiooxalic acid InChI=1S/C2HClO2S/c3-1(6)2(4)5/h(H,4,5) cyanooxalic acid InChI=1S/C3HNO3/c4-1-2(5)3(6)7/h(H,6,7) 5-carbonoperoxoylpentanoic acid InChI=1S/C6H10O5/c7-5(8)3-1-2-4-6(9)11-10/h10H,1-4H2,(H,7,8) -phosphoroperoxoyldibenzene InChI=1S/C12H11O3P/c13-15-16(14,11-7-3-1-4-8-11)12-9-5-2-6-10-12/h1-10,13H \ No newline at end of file +phosphoroperoxoyldibenzene InChI=1S/C12H11O3P/c13-15-16(14,11-7-3-1-4-8-11)12-9-5-2-6-10-12/h1-10,13H +phosphoric methyl amide ethyl amide propyl amide InChI=1S/C6H18N3OP/c1-4-6-9-11(10,7-3)8-5-2/h4-6H2,1-3H3,(H3,7,8,9,10) \ No newline at end of file diff -Nru opsin-1.5.0/opsin-inchi/src/test/resources/uk/ac/cam/ch/wwmm/opsin/fusedRings.txt opsin-2.3.1/opsin-inchi/src/test/resources/uk/ac/cam/ch/wwmm/opsin/fusedRings.txt --- opsin-1.5.0/opsin-inchi/src/test/resources/uk/ac/cam/ch/wwmm/opsin/fusedRings.txt 2013-07-21 14:02:29.000000000 +0000 +++ opsin-2.3.1/opsin-inchi/src/test/resources/uk/ac/cam/ch/wwmm/opsin/fusedRings.txt 2017-07-23 20:55:18.000000000 +0000 @@ -1 +1,13 @@ cyclopenta[1,2-b:5,1-b']bis[1,4]oxathiine InChI=1S/C9H6O2S2/c1-2-8-9(11-4-5-12-8)7(1)10-3-6-13-9/h1-6H +4a,8a-propanoquinoline InChI=1S/C12H13N/c1-2-8-12-9-3-6-11(12,5-1)7-4-10-13-12/h1-2,4-5,7-8,10H,3,6,9H2 +1,5-methanoindole InChI=1S/C9H7N/c1-2-9-8-3-4-10(9)6-7(1)5-8/h1-5H,6H2 +1,5-methano-1H-indole InChI=1S/C9H7N/c1-2-9-8-3-4-10(9)6-7(1)5-8/h1-5H,6H2 +9H-9,10-ethanoacridine InChI=1S/C15H13N/c1-3-7-14-12(5-1)11-9-10-16(14)15-8-4-2-6-13(11)15/h1-8,11H,9-10H2 +1,3-epoxynaphthalene InChI=1S/C10H6O/c1-2-4-9-7(3-1)5-8-6-10(9)11-8/h1-6H +1,12-ethenobenzo[4,5]cyclohepta[1,2,3-de]naphthalene InChI=1S/C20H12/c1-3-13-7-8-14-4-2-6-16-10-12-17-11-9-15(5-1)18(13)20(17)19(14)16/h1-12H +2,6:5,7-dimethanoindeno[7,1-bc]furan InChI=1S/C12H6O/c1-2-6-10-4-8-7-3-9(8)12(13-10)11(6)5(1)7/h1-2H,3-4H2 +1,2,3,4-tetrahydro-1,4-ethenoanthracen-2-ol InChI=1S/C16H14O/c17-16-9-12-5-6-13(16)15-8-11-4-2-1-3-10(11)7-14(12)15/h1-8,12-13,16-17H,9H2 +1,4:5,8-dimethanonaphthalene InChI=1S/C12H8/c1-2-8-5-7(1)11-9-3-4-10(6-9)12(8)11/h1-4H,5-6H2 +6,14:7,14-dimethanobenzo[7,8]cycloundeca[1,2-b]pyridine InChI=1S/C20H15N/c1-2-5-16-10-20-11-17(8-14(16)4-1)18(12-20)9-15-6-3-7-21-19(15)13-20/h1-10,13H,11-12H2 +6,13-ethano-6,13-methanodibenzo[b,g][1,6]diazecine InChI=1S/C19H16N2/c1-3-7-16-14(5-1)11-18-9-10-19(13-18,20-16)12-15-6-2-4-8-17(15)21-18/h1-8,11-12H,9-10,13H2 +(2S)-2-[(5R,6R,7R,14S)-N-cyclopropylmethyl-4,5-epoxy-6,14-ethano-3-hydroxy-6-methoxymorphinan-7-yl]-3,3-dimethylpentan-2-ol InChI=1S/C30H43NO4/c1-6-26(2,3)27(4,33)21-16-28-11-12-30(21,34-5)25-29(28)13-14-31(17-18-7-8-18)22(28)15-19-9-10-20(32)24(35-25)23(19)29/h9-10,18,21-22,25,32-33H,6-8,11-17H2,1-5H3/t21-,22-,25-,27+,28-,29+,30-/m1/s1 \ No newline at end of file diff -Nru opsin-1.5.0/opsin-inchi/src/test/resources/uk/ac/cam/ch/wwmm/opsin/implicitBracketting.txt opsin-2.3.1/opsin-inchi/src/test/resources/uk/ac/cam/ch/wwmm/opsin/implicitBracketting.txt --- opsin-1.5.0/opsin-inchi/src/test/resources/uk/ac/cam/ch/wwmm/opsin/implicitBracketting.txt 2013-07-21 14:02:29.000000000 +0000 +++ opsin-2.3.1/opsin-inchi/src/test/resources/uk/ac/cam/ch/wwmm/opsin/implicitBracketting.txt 2017-07-23 20:55:18.000000000 +0000 @@ -5,4 +5,18 @@ 1,4-dimethoxycarbonyl-benzene InChI=1S/C10H10O4/c1-13-9(11)7-3-5-8(6-4-7)10(12)14-2/h3-6H,1-2H3 1,5-bis-(4-methylphenyl)sulfonylbenzene InChI=1/C20H18O4S2/c1-15-6-10-17(11-7-15)25(21,22)19-4-3-5-20(14-19)26(23,24)18-12-8-16(2)9-13-18/h3-14H,1-2H3 S-fluoromethyl methanethioate InChI=1S/C2H3FOS/c3-1-5-2-4/h2H,1H2 -2-pentafluoroethylpropanamine InChI=1S/C5H8F5N/c1-3(2-11)4(6,7)5(8,9)10/h3H,2,11H2,1H3 \ No newline at end of file +2-pentafluoroethylpropanamine InChI=1S/C5H8F5N/c1-3(2-11)4(6,7)5(8,9)10/h3H,2,11H2,1H3 +p-dimethylaminopyridine InChI=1S/C7H10N2/c1-9(2)7-3-5-8-6-4-7/h3-6H,1-2H3 +3-methanesulfonylmethyl-phenylamine InChI=1S/C8H11NO2S/c1-12(10,11)6-7-3-2-4-8(9)5-7/h2-5H,6,9H2,1H3 +tert-butyldimethylsilyloxycyclohexane InChI=1S/C12H26OSi/c1-12(2,3)14(4,5)13-11-9-7-6-8-10-11/h11H,6-10H2,1-5H3 +tert-butyldimethylsiloxycyclohexane InChI=1S/C12H26OSi/c1-12(2,3)14(4,5)13-11-9-7-6-8-10-11/h11H,6-10H2,1-5H3 +tert-butyldimethylsilanoxycyclohexane InChI=1S/C12H26OSi/c1-12(2,3)14(4,5)13-11-9-7-6-8-10-11/h11H,6-10H2,1-5H3 +tert-butyl(dimethyl)siloxycyclohexane InChI=1S/C12H26OSi/c1-12(2,3)14(4,5)13-11-9-7-6-8-10-11/h11H,6-10H2,1-5H3 +tert-butyl-dimethylsiloxycyclohexane InChI=1S/C12H26OSi/c1-12(2,3)14(4,5)13-11-9-7-6-8-10-11/h11H,6-10H2,1-5H3 +tert-butyldiphenylsiloxycyclohexane InChI=1S/C22H30OSi/c1-22(2,3)24(20-15-9-5-10-16-20,21-17-11-6-12-18-21)23-19-13-7-4-8-14-19/h5-6,9-12,15-19H,4,7-8,13-14H2,1-3H3 +pyridine ditrifluoroacetate InChI=1S/C5H5N.2C2HF3O2/c1-2-4-6-5-3-1;2*3-2(4,5)1(6)7/h1-5H;2*(H,6,7) +bis-tetrabutylammonium phosphate InChI=1S/2C16H36N.H3O4P/c2*1-5-9-13-17(14-10-6-2,15-11-7-3)16-12-8-4;1-5(2,3)4/h2*5-16H2,1-4H3;(H3,1,2,3,4)/q2*+1;/p-2 +4-Tert-butoxy-carbonyl-piperazine InChI=1S/C9H18N2O2/c1-9(2,3)13-8(12)11-6-4-10-5-7-11/h10H,4-7H2,1-3H3 +4,4'-propylmethylenedianiline InChI=1S/C16H20N2/c1-2-3-16(12-4-8-14(17)9-5-12)13-6-10-15(18)11-7-13/h4-11,16H,2-3,17-18H2,1H3 +//Counter example +Diisopropylazodicarboxylate InChI=1S/C8H14N2O4/c1-5(2)13-7(11)9-10-8(12)14-6(3)4/h5-6H,1-4H3 \ No newline at end of file diff -Nru opsin-1.5.0/opsin-inchi/src/test/resources/uk/ac/cam/ch/wwmm/opsin/inorganics.txt opsin-2.3.1/opsin-inchi/src/test/resources/uk/ac/cam/ch/wwmm/opsin/inorganics.txt --- opsin-1.5.0/opsin-inchi/src/test/resources/uk/ac/cam/ch/wwmm/opsin/inorganics.txt 1970-01-01 00:00:00.000000000 +0000 +++ opsin-2.3.1/opsin-inchi/src/test/resources/uk/ac/cam/ch/wwmm/opsin/inorganics.txt 2017-07-23 20:55:18.000000000 +0000 @@ -0,0 +1,11 @@ +//mostly ionic +sodium chloride InChI=1S/ClH.Na/h1H;/q;+1/p-1 +sodium oxide InChI=1S/2Na.O/q2*+1;-2 +sodium dioxide InChI=1S/2Na.O2/c;;1-2/q2*+1;-2 +barium sulfide InChI=1S/Ba.S/q+2;-2 +barium disulfide InChI=1S/Ba.S2/c;1-2/q+2;-2 +iron(III) oxide InChI=1S/2Fe.3O/q2*+3;3*-2 +//mostly covalent +boron trifluoride InChI=1S/BF3/c2-1(3)4 +silicon dioxide InChI=1S/O2Si/c1-3-2 +carbon tetrachloride InChI=1S/CCl4/c2-1(3,4)5 diff -Nru opsin-1.5.0/opsin-inchi/src/test/resources/uk/ac/cam/ch/wwmm/opsin/isotopes.txt opsin-2.3.1/opsin-inchi/src/test/resources/uk/ac/cam/ch/wwmm/opsin/isotopes.txt --- opsin-1.5.0/opsin-inchi/src/test/resources/uk/ac/cam/ch/wwmm/opsin/isotopes.txt 1970-01-01 00:00:00.000000000 +0000 +++ opsin-2.3.1/opsin-inchi/src/test/resources/uk/ac/cam/ch/wwmm/opsin/isotopes.txt 2017-07-23 20:55:18.000000000 +0000 @@ -0,0 +1,114 @@ +//From CAS recommendations +Methane-d InChI=1S/CH4/h1H4/i1D +Methane-d4 InChI=1S/CH4/h1H4/i1D4 +2,2,2-trifluoro-Ethane-d InChI=1S/C2H3F3/c1-2(3,4)5/h1H3/i1D +2-chloro-6-(methyl-d3)-Benzene-d InChI=1S/C7H7Cl/c1-6-3-2-4-7(8)5-6/h2-5H,1H3/i1D3,5D +methyl-Phosphine-d2 InChI=1S/CH5P/c1-2/h2H2,1H3/i2D2 +Urea-N,N,N',N'-d4 InChI=1S/CH4N2O/c2-1(3)4/h(H4,2,3,4)/i/hD4 +Hydroxyl-d-amine-d2 InChI=1S/H3NO/c1-2/h2H,1H2/i1D2,2D +N-(methyl-d3)-Methan-d3-amine InChI=1S/C2H7N/c1-3-2/h3H,1-2H3/i1D3,2D3 +Silanamine-d2 InChI=1S/H5NSi/c1-2/h1H2,2H3/i1D2 +Hydroxyl-d-amine-d InChI=1S/H3NO/c1-2/h2H,1H2/i1D,2D +Ethan-2-d-amine InChI=1S/C2H7N/c1-2-3/h2-3H2,1H3/i1D +Methanol-d InChI=1S/CH4O/c1-2/h2H,1H3/i2D +Methan-d-ol 1-methanesulfonate InChI=1S/C2H6O3S/c1-5-6(2,3)4/h1-2H3/i1D +//Benzene-4-d-methane-alpha,alpha-d2-thiol InChI=1S/C7H8S/c8-6-7-4-2-1-3-5-7/h1-5,8H,6H2/i1D,6D2 +//Alanine-N,N,1-d3 InChI=1S/C3H7NO2/c1-2(4)3(5)6/h2H,4H2,1H3,(H,5,6)/t2-/m0/s1/i/hD3 +1H-Imidazole-1-d-2-carboxylic acid-d InChI=1S/C4H4N2O2/c7-4(8)3-5-1-2-6-3/h1-2H,(H,5,6)(H,7,8)/i/hD2 +//1,2-Ethane-1,1,2,2-d4-diol-1,2-d2 InChI=1S/C2H6O2/c3-1-2-4/h3-4H,1-2H2/i1D2,2D2,3D,4D +Acetaldehyde-1,2-d2 InChI=1S/C2H4O/c1-2-3/h2H,1H3/i1D,2D +Propanamide-N,3-d2 InChI=1S/C3H7NO/c1-2-3(4)5/h2H2,1H3,(H2,4,5)/i1D/hD +1-(ethyl-2,2,2-d3)-4-(methyl-d3)-Benzene InChI=1S/C9H12/c1-3-9-6-4-8(2)5-7-9/h4-7H,3H2,1-2H3/i1D3,2D3 +N-(2-piperidinyl-1-d)-Carbamic acid InChI=1S/C6H12N2O2/c9-6(10)8-5-3-1-2-4-7-5/h5,7-8H,1-4H2,(H,9,10)/i/hD +2-Propanone-1,1,1,3,3,3-d6 InChI=1S/C3H6O/c1-3(2)4/h1-2H3/i1D3,2D3 +//From IUPAC +(14C)methane InChI=1S/CH4/h1H4/i1+2 +trichloro(12C)methane InChI=1S/CHCl3/c2-1(3)4/h1H/i1+0 +(12C)chloroform InChI=1S/CHCl3/c2-1(3)4/h1H/i1+0 +(²H1)methane InChI=1S/CH4/h1H4/i1D +dichloro(²H2)methane InChI=1S/CH2Cl2/c2-1-3/h1H2/i1D2 +(2H3)methoxybenzene InChI=1S/C7H8O/c1-8-7-5-3-2-4-6-7/h2-6H,1H3/i1D3 +(α,α,α-2H3)anisole InChI=1S/C7H8O/c1-8-7-5-3-2-4-6-7/h2-6H,1H3/i1D3 +1-phenyl(1,2-13C2)ethanone InChI=1S/C8H8O/c1-7(9)8-5-3-2-4-6-8/h2-6H,1H3/i1+1,7+1 +(1,2-13C2)acetophenone InChI=1S/C8H8O/c1-7(9)8-5-3-2-4-6-8/h2-6H,1H3/i1+1,7+1 +(1,2-13C)acetophenone InChI=1S/C8H8O/c1-7(9)8-5-3-2-4-6-8/h2-6H,1H3/i1+1,7+1 +1,2-di[(13C)methyl]benzene InChI=1S/C8H10/c1-7-5-3-4-6-8(7)2/h3-6H,1-2H3/i1+1,2+1 +(α,α′-13C2)-1,2-xylene InChI=1S/C8H10/c1-7-5-3-4-6-8(7)2/h3-6H,1-2H3/i1+1,2+1 +2-(13C)methyl-(1-13C)benzene InChI=1S/C7H8/c1-7-5-3-2-4-6-7/h2-6H,1H3/i1+1,5+1 +(2-2H1)ethan-1-ol InChI=1S/C2H6O/c1-2-3/h3H,2H2,1H3/i1D +(2-13C)ethan-1-ol InChI=1S/C2H6O/c1-2-3/h3H,2H2,1H3/i1+1 +1-[amino(14C)methyl]cyclopentan-1-ol InChI=1S/C6H13NO/c7-5-6(8)3-1-2-4-6/h8H,1-5,7H2/i5+2 +1-(aminomethyl)cyclopentan-1-(18O)ol InChI=1S/C6H13NO/c7-5-6(8)3-1-2-4-6/h8H,1-5,7H2/i8+2 +N-[7-(131I)iodo-9H-fluoren-2-yl]acetamide InChI=1S/C15H12INO/c1-9(18)17-13-3-5-15-11(8-13)6-10-7-12(16)2-4-14(10)15/h2-5,7-8H,6H2,1H3,(H,17,18)/i16+4 +sodium 4-ethoxy-4-oxo(2,3-14C2)butanoate InChI=1S/C6H10O4.Na/c1-2-10-6(9)4-3-5(7)8;/h2-4H2,1H3,(H,7,8);/q;+1/p-1/i3+2,4+2; +sodium ethyl (2,3-14C2)butanedioate InChI=1S/C6H10O4.Na/c1-2-10-6(9)4-3-5(7)8;/h2-4H2,1H3,(H,7,8);/q;+1/p-1/i3+2,4+2; +sodium ethyl (2,3-14C2)succinate InChI=1S/C6H10O4.Na/c1-2-10-6(9)4-3-5(7)8;/h2-4H2,1H3,(H,7,8);/q;+1/p-1/i3+2,4+2; +4-[(3-14C)thiolan-2-yl]pyridine InChI=1S/C9H11NS/c1-2-9(11-7-1)8-3-5-10-6-4-8/h3-6,9H,1-2,7H2/i2+2 +4-[tetrahydro(3-14C)thiophen-2-yl]pyridine InChI=1S/C9H11NS/c1-2-9(11-7-1)8-3-5-10-6-4-8/h3-6,9H,1-2,7H2/i2+2 +2-(35Cl)chloro-3-[(²H3)methyl](1-²H1)pentane InChI=1S/C6H13Cl/c1-4-5(2)6(3)7/h5-6H,4H2,1-3H3/i2D3,3D,7+0 +2-(13C)methyl-3-methylpyridine InChI=1S/C7H9N/c1-6-4-3-5-8-7(6)2/h3-5H,1-2H3/i2+1 +2-(2,2-2H2)ethyl-3-ethylhexan-1-ol InChI=1S/C10H22O/c1-4-7-9(5-2)10(6-3)8-11/h9-11H,4-8H2,1-3H3/i3D2 +//cyclohexane-1,1-di[(14C)-carboxylic acid] InChI=1S/C8H12O4/c9-6(10)8(7(11)12)4-2-1-3-5-8/h1-5H2,(H,9,10)(H,11,12)/i6+2,7+2 +//1-carboxycyclohexane-1-(13C,2H)carboxylic acid InChI=1S/C8H12O4/c9-6(10)8(7(11)12)4-2-1-3-5-8/h1-5H2,(H,9,10)(H,11,12)/i6+1/hD +//1-(2H)carboxycyclohexane-1-(13C)carboxylic acid InChI=1S/C8H12O4/c9-6(10)8(7(11)12)4-2-1-3-5-8/h1-5H2,(H,9,10)(H,11,12)/i6+1/hD +1-(13C)carboxycyclohexane-1-(14C)carboxylic acid InChI=1S/C8H12O4/c9-6(10)8(7(11)12)4-2-1-3-5-8/h1-5H2,(H,9,10)(H,11,12)/i6+1,7+2 +(1-15N)-1H-indole InChI=1S/C8H7N/c1-2-4-8-7(3-1)5-6-9-8/h1-6,9H/i9+1 +2,3-dihydro(1-15N)-1H-indole InChI=1S/C8H9N/c1-2-4-8-7(3-1)5-6-9-8/h1-4,9H,5-6H2/i9+1 +2,3-dihydro(2,3-2H2,1-15N)-1H-indole InChI=1S/C8H9N/c1-2-4-8-7(3-1)5-6-9-8/h1-4,9H,5-6H2/i5D,6D,9+1 +//2,3-di[(2H)hydro]-(2,3-2H2,15N)-1H-indole +//6-methyl-2,3-di[(2H2)dihydro](2,3-2H1)napthalen-1-ol +(2-²H1)acetic acid InChI=1S/C2H4O2/c1-2(3)4/h1H3,(H,3,4)/i1D +//acetic (²H)acid InChI=1S/C2H4O2/c1-2(3)4/h1H3,(H,3,4)/i1D +(O-²H)acetic acid InChI=1S/C2H4O2/c1-2(3)4/h1H3,(H,3,4)/i/hD +//(O-2H,18O)acetic acid +//(18O-2H)acetic acid +//(1-14C)pentan(³H)oic acid InChI=1S/C5H10O2/c1-2-3-4-5(6)7/h2-4H2,1H3,(H,6,7)/i5+2/hT +sodium (14C)formate InChI=1S/CH2O2.Na/c2-1-3;/h1H,(H,2,3);/q;+1/p-1/i1+2; +//cyclohexane(²H)carboxylic acid InChI=1S/C7H12O2/c8-7(9)6-4-2-1-3-5-6/h6H,1-5H2,(H,8,9)/i/hD +4-[(2-14C)ethyl]benzoic acid InChI=1S/C9H10O2/c1-2-7-3-5-8(6-4-7)9(10)11/h3-6H,2H2,1H3,(H,10,11)/i1+2 +(1-14C)ethyl propanoate InChI=1S/C5H10O2/c1-3-5(6)7-4-2/h3-4H2,1-2H3/i4+2 +ethyl (2-14C)propanoate InChI=1S/C5H10O2/c1-3-5(6)7-4-2/h3-4H2,1-2H3/i3+2 +(N-²H)acetamide InChI=1S/C2H5NO/c1-2(3)4/h1H3,(H2,3,4)/i/hD +//acet(²H)amide InChI=1S/C2H5NO/c1-2(3)4/h1H3,(H2,3,4)/i/hD +(N,N-2H2)aniline InChI=1S/C6H7N/c7-6-4-2-1-3-5-6/h1-5H,7H2/i/hD2 +(N,N-2H2)benzenamine InChI=1S/C6H7N/c7-6-4-2-1-3-5-6/h1-5H,7H2/i/hD2 +//methan(²H,18O)ol InChI=1S/CH4O/c1-2/h2H,1H3/i2+2D +(2-²H1,1-³H1)ethan-1-ol InChI=1S/C2H6O/c1-2-3/h3H,2H2,1H3/i1D,2T +(1R)-(1-²H1)ethan-1-ol InChI=1S/C2H6O/c1-2-3/h3H,2H2,1H3/i2D/t2-/m1/s1 +(1E)-(1-²H1)prop-1-ene InChI=1S/C3H6/c1-3-2/h3H,1H2,2H3/i1D/b3-1+ +(24R)-5alpha-(24-2H1)cholestane InChI=1S/C27H48/c1-19(2)9-8-10-20(3)23-14-15-24-22-13-12-21-11-6-7-17-26(21,4)25(22)16-18-27(23,24)5/h19-25H,6-18H2,1-5H3/t20-,21-,22+,23-,24+,25+,26+,27-/m1/s1/i9D/t9-,20-,21-,22+,23-,24+,25+,26+,27- +5alpha-(17-²H)pregnane InChI=1S/C21H36/c1-4-15-9-11-18-17-10-8-16-7-5-6-13-20(16,2)19(17)12-14-21(15,18)3/h15-19H,4-14H2,1-3H3/t15-,16+,17-,18-,19-,20-,21+/m0/s1/i15D +//L-(4-13C,35S)methionine +2-(18F) fluoro-2-deoxy-β-D-glucopyranose InChI=1S/C6H11FO5/c7-3-5(10)4(9)2(1-8)12-6(3)11/h2-6,8-11H,1H2/t2-,3-,4-,5-,6-/m1/s1/i7-1 +(2S)-(2-²H)butan-2-ol InChI=1S/C4H10O/c1-3-4(2)5/h4-5H,3H2,1-2H3/t4-/m0/s1/i4D +(2E)-1-chloro(2-²H)but-2-ene InChI=1S/C4H7Cl/c1-2-3-4-5/h2-3H,4H2,1H3/b3-2+/i3D +(2R,3R)-3-chloro(2-²H1)butan-2-ol InChI=1S/C4H9ClO/c1-3(5)4(2)6/h3-4,6H,1-2H3/t3-,4-/m1/s1/i4D +1,1,1-trifluoro(2-²H1)ethane InChI=1S/C2H3F3/c1-2(3,4)5/h1H3/i1D +1-chloro-3-fluoro(2-²H)benzene InChI=1S/C6H4ClF/c7-5-2-1-3-6(8)4-5/h1-4H/i4D +2-methoxy(3,4,5,6-³H4)phenol InChI=1S/C7H8O2/c1-9-7-5-3-2-4-6(7)8/h2-5,8H,1H3/i2T,3T,4T,5T +(2-14C)butane InChI=1S/C4H10/c1-3-4-2/h3-4H2,1-2H3/i3+2 +(3-14C,2,2-²H2)butane InChI=1S/C4H10/c1-3-4-2/h3-4H2,1-2H3/i3D2,4+2 +(2-14C,3-²H1)butane InChI=1S/C4H10/c1-3-4-2/h3-4H2,1-2H3/i3D,4+2 +(3-³H)phenol InChI=1S/C6H6O/c7-6-4-2-1-3-5-6/h1-5,7H/i2T +(2R)-(1-²H1)propan-2-ol InChI=1S/C3H8O/c1-3(2)4/h3-4H,1-2H3/i1D/t3-/m0/s1 +(2R)-1-(131I)iodo-3-iodopropan-2-ol InChI=1S/C3H6I2O/c4-1-3(6)2-5/h3,6H,1-2H2/i4+4/t3-/m0/s1 +(2S,4R)-(4-²H1,2-³H1)pentane InChI=1S/C5H12/c1-3-5-4-2/h3-5H2,1-2H3/i3D,4T/t3-,4+/m1/s1 +(²H3)acetonitrile InChI=1S/C2H3N/c1-2-3/h1H3/i1D3 +//ethan(²H)ol InChI=1S/C2H6O/c1-2-3/h3H,2H2,1H3/i3D +(2-13C)ethan-1-ol InChI=1S/C2H6O/c1-2-3/h3H,2H2,1H3/i1+1 +{[(²H1)methoxy(²H2)methyl]sulfanyl}methaneperoxol InChI=1S/C3H8O3S/c1-5-2-7-3-6-4/h4H,2-3H2,1H3/i1D,2D2 +(2,3-2H2,15N)pyridine InChI=1S/C5H5N/c1-2-4-6-5-3-1/h1-5H/i2D,4D,6+1 +(2H6)benzene InChI=1S/C6H6/c1-2-4-6-5-3-1/h1-6H/i1D,2D,3D,4D,5D,6D +2-(79Br)bromo-(1-13C)benzene InChI=1S/C6H5Br/c7-6-4-2-1-3-5-6/h1-5H/i4+1,7-1 +//(1-²H1)ethan-1-(2H)ol InChI=1S/C2H6O/c1-2-3/h3H,2H2,1H3/i2D,3D +(1,1,1,3,3-²H5)pentan-2-one InChI=1S/C5H10O/c1-3-4-5(2)6/h3-4H2,1-2H3/i2D3,4D2 +(2R)-2(O-2H)hydroxy-3-hydroxy(1-2H)propanal InChI=1S/C3H6O3/c4-1-3(6)2-5/h1,3,5-6H,2H2/t3-/m0/s1/i1D,6D +//D-(2-O,1-²H2)glyceraldehyde InChI=1S/C3H6O3/c4-1-3(6)2-5/h1,3,5-6H,2H2/t3-/m0/s1/i1D,6D +//D-(O-²H)glycer(2H)aldehyde InChI=1S/C3H6O3/c4-1-3(6)2-5/h1,3,5-6H,2H2/t3-/m0/s1/i1D,6D +//DL-[methyl-(14C,2H3)]methionine +//L-(carbamimidoyl-14C,N′-15N)arginine +//L-(α-2H)-phenylalanine InChI=1S/C9H11NO2/c10-8(9(11)12)6-7-4-2-1-3-5-7/h1-5,8H,6,10H2,(H,11,12)/t8-/m0/s1/i8D +1-(naphthalen-2-yl)-2-phenyl(1-15N)diazene InChI=1S/C16H12N2/c1-2-8-15(9-3-1)17-18-16-11-10-13-6-4-5-7-14(13)12-16/h1-12H/i18+1 +1-propylidene(1-15N)diazane InChI=1S/C3H8N2/c1-2-3-5-4/h3H,2,4H2,1H3/i5+1 +3-[ethyl(2-34S)trisulfan-1-yl]propanoic acid InChI=1S/C5H10O2S3/c1-2-8-10-9-4-3-5(6)7/h2-4H2,1H3,(H,6,7)/i10+2 +1-(1-chloronaphthalen-2-yl)-2-phenyl(1-15N)diazene 2-oxide InChI=1S/C16H11ClN2O/c17-16-14-9-5-4-6-12(14)10-11-15(16)18-19(20)13-7-2-1-3-8-13/h1-11H/i18+1 diff -Nru opsin-1.5.0/opsin-inchi/src/test/resources/uk/ac/cam/ch/wwmm/opsin/miscellany.txt opsin-2.3.1/opsin-inchi/src/test/resources/uk/ac/cam/ch/wwmm/opsin/miscellany.txt --- opsin-1.5.0/opsin-inchi/src/test/resources/uk/ac/cam/ch/wwmm/opsin/miscellany.txt 2013-07-21 14:02:29.000000000 +0000 +++ opsin-2.3.1/opsin-inchi/src/test/resources/uk/ac/cam/ch/wwmm/opsin/miscellany.txt 2017-07-23 20:55:18.000000000 +0000 @@ -5,7 +5,6 @@ 3,3'-methylenebis(2,4,6-trimethylbenzaldehyde) disemicarbazone InChI=1S/C23H30N6O2/c1-12-7-14(3)20(10-26-28-22(24)30)16(5)18(12)9-19-13(2)8-15(4)21(17(19)6)11-27-29-23(25)31/h7-8,10-11H,9H2,1-6H3,(H3,24,28,30)(H3,25,29,31) 2-[1-(3,4-dihydro-2(1H)-isoquinolinylacetyl)-3-oxo-2-piperazinyl]-N-phenylacetamide InChI=1S/C23H26N4O3/c28-21(25-19-8-2-1-3-9-19)14-20-23(30)24-11-13-27(20)22(29)16-26-12-10-17-6-4-5-7-18(17)15-26/h1-9,20H,10-16H2,(H,24,30)(H,25,28) alpha-ethylfuran-2-methanol InChI=1/C7H10O2/c1-2-6(8)7-4-3-5-9-7/h3-6,8H,2H2,1H3 -2-pyridylformaldehyde semicarbazone InChI=1/C7H8N4O/c8-7(12)11-10-5-6-3-1-2-4-9-6/h1-5H,(H3,8,11,12)/f/h11H,8H2 (S)-N-{5-(4-Fluoro-phenyl)-4-[1-(4-fluoro-phenyl)-1H-indol-4-ylmethyl]-3-oxo-3,4-dihydro-pyrazin-2-yl}-2-methylamino-propionamide InChI=1S/C29H25F2N5O2/c1-18(32-2)28(37)34-27-29(38)36(26(16-33-27)19-6-8-21(30)9-7-19)17-20-4-3-5-25-24(20)14-15-35(25)23-12-10-22(31)11-13-23/h3-16,18,32H,17H2,1-2H3,(H,33,34,37)/t18-/m0/s1 (S)-N-{5-(4-Fluoro-phenyl)-4-[1-(4-fluoro-phenyl)-(1H-indol-4-yl)methyl]-3-oxo-3,4-dihydro-pyrazin-2-yl}-2-methylamino-propionamide InChI=1S/C29H25F2N5O2/c1-17(32-2)28(37)35-27-29(38)36(25(16-34-27)18-6-10-20(30)11-7-18)26(19-8-12-21(31)13-9-19)23-4-3-5-24-22(23)14-15-33-24/h3-17,26,32-33H,1-2H3,(H,34,35,37)/t17-,26?/m0/s1 1-(1-phenylcyclopentyl)methylamine InChI=1S/C12H17N/c13-10-12(8-4-5-9-12)11-6-2-1-3-7-11/h1-3,6-7H,4-5,8-10,13H2 @@ -16,4 +15,24 @@ hexachlorophosphate InChI=1S/Cl6P/c1-7(2,3,4,5)6/q-1 Hexafluorosilicic acid InChI=1S/F6Si/c1-7(2,3,4,5)6/q-2/p+2 Hexafluorophosphoric acid InChI=1S/F6P/c1-7(2,3,4,5)6/q-1/p+1 -Hexafluorophosphoric acid triamide InChI=1S/F6N3OP/c1-7(2)11(10,8(3)4)9(5)6 \ No newline at end of file +Hexafluorophosphoric acid triamide InChI=1S/F6N3OP/c1-7(2)11(10,8(3)4)9(5)6 +phenyltrifluoroborate InChI=1S/C6H5BF3/c8-7(9,10)6-4-2-1-3-5-6/h1-5H/q-1 +ethylnitrolic acid InChI=1S/C2H4N2O3/c1-2(3-5)4(6)7/h5H,1H3 +//Formally ambiguous +pentachlorobenzyl acetate InChI=1S/C9H5Cl5O2/c1-3(15)16-2-4-5(10)7(12)9(14)8(13)6(4)11/h2H2,1H3 +//Formally ambiguous +tetraphenylporphyrin InChI=1S/C44H30N4/c1-5-13-29(14-6-1)41-33-21-23-35(45-33)42(30-15-7-2-8-16-30)37-25-27-39(47-37)44(32-19-11-4-12-20-32)40-28-26-38(48-40)43(31-17-9-3-10-18-31)36-24-22-34(41)46-36/h1-28,45,48H +//Formally ambiguous +hexachlorocyclohexane InChI=1S/C6H6Cl6/c7-1-2(8)4(10)6(12)5(11)3(1)9/h1-6H +pyridinium hemisulfate InChI=1S/2C5H5N.H2O4S/c2*1-2-4-6-5-3-1;1-5(2,3)4/h2*1-5H;(H2,1,2,3,4) +potassium carbonate sesquihydrate InChI=1S/2CH2O3.4K.3H2O/c2*2-1(3)4;;;;;;;/h2*(H2,2,3,4);;;;;3*1H2/q;;4*+1;;;/p-4 +S-methylmethionine InChI=1S/C6H13NO2S/c1-10(2)4-3-5(7)6(8)9/h5H,3-4,7H2,1-2H3/p+1/t5-/m0/s1 +Se-benzyl-seleno-methionine InChI=1S/C12H17NO2Se/c1-16(8-7-11(13)12(14)15)9-10-5-3-2-4-6-10/h2-6,11H,7-9,13H2,1H3/p+1/t11-,16?/m0/s1 +//different phospho interpretation +phosphobenzene InChI=1S/C6H5O2P/c7-9(8)6-4-2-1-3-5-6/h1-5H +6-phospho-2-O-methyl-D-mannose InChI=1S/C7H15O9P/c1-15-5(2-8)7(11)6(10)4(9)3-16-17(12,13)14/h2,4-7,9-11H,3H2,1H3,(H2,12,13,14)/t4-,5-,6-,7-/m1/s1 +bicyclo[5.4.0]-7-undecene InChI=1/C11H18/c1-2-6-10-8-4-5-9-11(10)7-3-1/h8,11H,1-7,9H2 +spiro[4.5]-2-decene InChI=1S/C10H16/c1-2-6-10(7-3-1)8-4-5-9-10/h4-5H,1-3,6-9H2 +glutamylglycine InChI=1S/C7H12N2O5/c8-4(1-2-5(10)11)7(14)9-3-6(12)13/h4H,1-3,8H2,(H,9,14)(H,10,11)(H,12,13)/t4-/m0/s1 +2'-Deoxy-5-azacytidylyl-(3'→5')-2'-deoxyguanosine InChI=1S/C18H24N9O10P/c19-16-22-6-27(18(31)25-16)12-2-8(9(3-28)35-12)37-38(32,33)34-4-10-7(29)1-11(36-10)26-5-21-13-14(26)23-17(20)24-15(13)30/h5-12,28-29H,1-4H2,(H,32,33)(H2,19,25,31)(H3,20,23,24,30)/t7-,8-,9+,10+,11+,12+/m0/s1 +4-benzofuran-2-yl-2-methyl-1,2,3,4-tetrahydroisoquinolin-4-ol InChI=1S/C18H17NO2/c1-19-11-14-7-2-4-8-15(14)18(20,12-19)17-10-13-6-3-5-9-16(13)21-17/h2-10,20H,11-12H2,1H3 diff -Nru opsin-1.5.0/opsin-inchi/src/test/resources/uk/ac/cam/ch/wwmm/opsin/multiplicativeNomenclature.txt opsin-2.3.1/opsin-inchi/src/test/resources/uk/ac/cam/ch/wwmm/opsin/multiplicativeNomenclature.txt --- opsin-1.5.0/opsin-inchi/src/test/resources/uk/ac/cam/ch/wwmm/opsin/multiplicativeNomenclature.txt 2013-07-21 14:02:29.000000000 +0000 +++ opsin-2.3.1/opsin-inchi/src/test/resources/uk/ac/cam/ch/wwmm/opsin/multiplicativeNomenclature.txt 2017-07-23 20:55:18.000000000 +0000 @@ -16,3 +16,4 @@ 2,2'-((ethane-1,2-diylbis(azanylylidene))bis(methanylylidene))diphenol InChI=1/C16H16N2O2/c19-15-7-3-1-5-13(15)11-17-9-10-18-12-14-6-2-4-8-16(14)20/h1-8,11-12,19-20H,9-10H2 2,2'-[ethane-1,2-diylidenebis(azanylylidenemethanylylidene)]bis(cyclohexan-1-ol) InChI=1/C16H24N2O2/c19-15-7-3-1-5-13(15)11-17-9-10-18-12-14-6-2-4-8-16(14)20/h9-12,15-16,19-20H,1-8H2 2,2'-((ethane-1,2-diylidenebis(azanylylidene))bis(methanylylidene))dicyclohexanol InChI=1/C16H24N2O2/c19-15-7-3-1-5-13(15)11-17-9-10-18-12-14-6-2-4-8-16(14)20/h9-12,15-16,19-20H,1-8H2 +tetramethylethylenediamine InChI=1S/C6H16N2/c1-7(2)5-6-8(3)4/h5-6H2,1-4H3 diff -Nru opsin-1.5.0/opsin-inchi/src/test/resources/uk/ac/cam/ch/wwmm/opsin/omittedSpaces.txt opsin-2.3.1/opsin-inchi/src/test/resources/uk/ac/cam/ch/wwmm/opsin/omittedSpaces.txt --- opsin-1.5.0/opsin-inchi/src/test/resources/uk/ac/cam/ch/wwmm/opsin/omittedSpaces.txt 2013-07-21 14:02:29.000000000 +0000 +++ opsin-2.3.1/opsin-inchi/src/test/resources/uk/ac/cam/ch/wwmm/opsin/omittedSpaces.txt 2017-07-23 20:55:18.000000000 +0000 @@ -7,6 +7,9 @@ ethyloxalate InChI=1/C4H6O4/c1-2-8-4(7)3(5)6/h2H2,1H3,(H,5,6)/p-1/fC4H5O4/q-1 diethyloxalate InChI=1/C6H10O4/c1-3-9-5(7)6(8)10-4-2/h3-4H2,1-2H3 diethylsuccinate InChI=1/C8H14O4/c1-3-11-7(9)5-6-8(10)12-4-2/h3-6H2,1-2H3 +ethylphenylacetate InChI=1S/C10H12O2/c1-2-12-10(11)8-9-6-4-3-5-7-9/h3-7H,2,8H2,1H3 +//Note that this is a fudge to give the expected real-world interpretation +tert-butyl(phenyl)carbamate InChI=1S/C11H15NO2/c1-11(2,3)14-10(13)12-9-7-5-4-6-8-9/h4-8H,1-3H3,(H,12,13) //not omitted space 2-methylacetate InChI=1/C3H6O2/c1-2-3(4)5/h2H2,1H3,(H,4,5)/p-1/fC3H5O2/q-1 ethylterephthalate InChI=1/C10H10O4/c1-2-6-5-7(9(11)12)3-4-8(6)10(13)14/h3-5H,2H2,1H3,(H,11,12)(H,13,14)/p-2/fC10H8O4/q-2 @@ -15,4 +18,9 @@ ethylmalonate InChI=1/C5H8O4/c1-2-3(4(6)7)5(8)9/h3H,2H2,1H3,(H,6,7)(H,8,9)/p-2/fC5H6O4/q-2 diethylmalonate InChI=1/C7H12O4/c1-3-7(4-2,5(8)9)6(10)11/h3-4H2,1-2H3,(H,8,9)(H,10,11)/p-2/fC7H10O4/q-2 ethylsuccinate InChI=1/C6H10O4/c1-2-4(6(9)10)3-5(7)8/h4H,2-3H2,1H3,(H,7,8)(H,9,10)/p-2/fC6H8O4/q-2 -acetylacetate InChI=1/C4H6O3/c1-3(5)2-4(6)7/h2H2,1H3,(H,6,7)/p-1/fC4H5O3/q-1 \ No newline at end of file +acetylacetate InChI=1/C4H6O3/c1-3(5)2-4(6)7/h2H2,1H3,(H,6,7)/p-1/fC4H5O3/q-1 +diethylcarbamate InChI=1S/C5H11NO2/c1-3-6(4-2)5(7)8/h3-4H2,1-2H3,(H,7,8)/p-1 +sodium tert-butyl(phenyl)carbamate InChI=1S/C11H15NO2.Na/c1-11(2,3)12(10(13)14)9-7-5-4-6-8-9;/h4-8H,1-3H3,(H,13,14);/q;+1/p-1 +dimethyl(ethylenedioxy)dicarbamate InChI=1S/C6H12N2O6/c1-11-5(9)7-13-3-4-14-8-6(10)12-2/h3-4H2,1-2H3,(H,7,9)(H,8,10) +bis(aminomethyl)4,4'-(ethylenedioxy)bis(3-methylbenzoate) InChI=1S/C20H24N2O6/c1-13-9-15(19(23)27-11-21)3-5-17(13)25-7-8-26-18-6-4-16(10-14(18)2)20(24)28-12-22/h3-6,9-10H,7-8,11-12,21-22H2,1-2H3 +chloromethyl ether InChI=1/C2H4Cl2O/c3-1-5-2-4/h1-2H2 \ No newline at end of file diff -Nru opsin-1.5.0/opsin-inchi/src/test/resources/uk/ac/cam/ch/wwmm/opsin/spiro.txt opsin-2.3.1/opsin-inchi/src/test/resources/uk/ac/cam/ch/wwmm/opsin/spiro.txt --- opsin-1.5.0/opsin-inchi/src/test/resources/uk/ac/cam/ch/wwmm/opsin/spiro.txt 2013-07-21 14:02:29.000000000 +0000 +++ opsin-2.3.1/opsin-inchi/src/test/resources/uk/ac/cam/ch/wwmm/opsin/spiro.txt 2017-07-23 20:55:18.000000000 +0000 @@ -27,4 +27,20 @@ 1-Oxaspiro[4.5]dec-2-yl InChI=1S/C9H15O/c1-2-5-9(6-3-1)7-4-8-10-9/h8H,1-7H2 Cyclohexanespiro-2'-(tetrahydrofuran)-5'-yl InChI=1S/C9H15O/c1-2-5-9(6-3-1)7-4-8-10-9/h8H,1-7H2 Spiro[benzofuran-2(3H),1'-cyclohexan]-4'-yl InChI=1S/C13H15O/c1-4-8-13(9-5-1)10-11-6-2-3-7-12(11)14-13/h1-3,6-7H,4-5,8-10H2 -Spiro[naphthalene-2(3H),2'-thian]-4'-yl InChI=1S/C14H15S/c1-2-6-13-11-14(8-3-4-10-15-14)9-7-12(13)5-1/h1-3,5-7,11H,4,8-10H2 \ No newline at end of file +Spiro[naphthalene-2(3H),2'-thian]-4'-yl InChI=1S/C14H15S/c1-2-6-13-11-14(8-3-4-10-15-14)9-7-12(13)5-1/h1-3,5-7,11H,4,8-10H2 +5lambda^5-arsaspiro[4.4]nonan-5-ylium InChI=1S/C8H16As/c1-2-6-9(5-1)7-3-4-8-9/h1-8H2/q+1 +5-arsoniaspiro[4.4]nonane InChI=1S/C8H16As/c1-2-6-9(5-1)7-3-4-8-9/h1-8H2/q+1 +5lambda^5-phosphaspiro[4.4]nonan-5-uide InChI=1S/C8H18P/c1-2-6-9(5-1)7-3-4-8-9/h1-9H2/q-1 +5lambda^5-phosphanuidaspiro[4.4]nonane InChI=1S/C8H18P/c1-2-6-9(5-1)7-3-4-8-9/h1-9H2/q-1 +5lambda^5,5'-spirobi[benzo[b]phosphindol]-5-ylium InChI=1S/C24H16P/c1-5-13-21-17(9-1)18-10-2-6-14-22(18)25(21)23-15-7-3-11-19(23)20-12-4-8-16-24(20)25/h1-16H/q+1 +9-phosphonia-9,9'-spirobi[fluorene] InChI=1S/C24H16P/c1-5-13-21-17(9-1)18-10-2-6-14-22(18)25(21)23-15-7-3-11-19(23)20-12-4-8-16-24(20)25/h1-16H/q+1 +5,5'-spirobi[benzo[b]phosphindolium] InChI=1S/C24H16P/c1-5-13-21-17(9-1)18-10-2-6-14-22(18)25(21)23-15-7-3-11-19(23)20-12-4-8-16-24(20)25/h1-16H/q+1 +5,5'-spirobi[5H-dibenzophospholinium] InChI=1S/C24H16P/c1-5-13-21-17(9-1)18-10-2-6-14-22(18)25(21)23-15-7-3-11-19(23)20-12-4-8-16-24(20)25/h1-16H/q+1 +5lambda7,5',5''-spiroter[benzo[b]phosphindol]-5-ide InChI=1S/C36H24P/c1-7-19-31-25(13-1)26-14-2-8-20-32(26)37(31,33-21-9-3-15-27(33)28-16-4-10-22-34(28)37)35-23-11-5-17-29(35)30-18-6-12-24-36(30)37/h1-24H/q-1 +1H-2lambda5-spiro[isoquinoline-2,2'-pyrido[1,2-a]pyrazin]-2-ylium InChI=1/C17H15N2/c1-2-6-16-13-19(11-8-15(16)5-1)12-10-18-9-4-3-7-17(18)14-19/h1-12,14H,13H2/q+1 +spiro[isoquinoline-2(1H),2'-[2H]pyrido[1,2-a]pyrazinium] InChI=1/C17H15N2/c1-2-6-16-13-19(11-8-15(16)5-1)12-10-18-9-4-3-7-17(18)14-19/h1-12,14H,13H2/q+1 +2'H-3lambda5-spiro[3-azabicyclo[3.2.2]nonane-3,3'-[1,3]oxazol]-3-ylium InChI=1S/C11H18NO/c1-2-11-4-3-10(1)7-12(8-11)5-6-13-9-12/h5-6,10-11H,1-4,7-9H2/q+1 +spiro[3-azabicyclo[3.2.2]nonane-3,3'(2H)-oxazolium] InChI=1S/C11H18NO/c1-2-11-4-3-10(1)7-12(8-11)5-6-13-9-12/h5-6,10-11H,1-4,7-9H2/q+1 +spiro[fluorene-9,2'-[3]thiabicyclo[2.2.2]oct[5]ene] InChI=1S/C19H16S/c1-3-7-17-15(5-1)16-6-2-4-8-18(16)19(17)13-9-11-14(20-19)12-10-13/h1-9,11,13-14H,10,12H2 +//Incorrect indicated hydrogen +5'-bromo-1',3'-dihydro-2H,5H-spiro[imidazolidine-4,2'-indene]-2,5-dione InChI=1/C11H9BrN2O2/c12-8-2-1-6-4-11(5-7(6)3-8)9(15)13-10(16)14-11/h1-3H,4-5H2,(H2,13,14,15,16) diff -Nru opsin-1.5.0/opsin-inchi/src/test/resources/uk/ac/cam/ch/wwmm/opsin/stereochemistry.txt opsin-2.3.1/opsin-inchi/src/test/resources/uk/ac/cam/ch/wwmm/opsin/stereochemistry.txt --- opsin-1.5.0/opsin-inchi/src/test/resources/uk/ac/cam/ch/wwmm/opsin/stereochemistry.txt 1970-01-01 00:00:00.000000000 +0000 +++ opsin-2.3.1/opsin-inchi/src/test/resources/uk/ac/cam/ch/wwmm/opsin/stereochemistry.txt 2017-07-23 20:55:18.000000000 +0000 @@ -0,0 +1,6 @@ +(3xi)-threonine InChI=1S/C4H9NO3/c1-2(6)3(5)4(7)8/h2-3,6H,5H2,1H3,(H,7,8)/t2?,3-/m0/s1 +D-(+)-glucose InChI=1S/C6H12O6/c7-1-3(9)5(11)6(12)4(10)2-8/h1,3-6,8-12H,2H2/t3-,4+,5+,6+/m0/s1 +L-2-phenylglycine InChI=1S/C8H9NO2/c9-7(8(10)11)6-4-2-1-3-5-6/h1-5,7H,9H2,(H,10,11)/t7-/m0/s1 +L-2-Aminobutyric acid InChI=1S/C4H9NO2/c1-2-3(5)4(6)7/h3H,2,5H2,1H3,(H,6,7)/t3-/m0/s1 +D-2-Aminobutyric acid InChI=1S/C4H9NO2/c1-2-3(5)4(6)7/h3H,2,5H2,1H3,(H,6,7)/t3-/m1/s1 +L-5-oxopyrrolidine-2-carboxylic acid InChI=1S/C5H7NO3/c7-4-2-1-3(6-4)5(8)9/h3H,1-2H2,(H,6,7)(H,8,9)/t3-/m0/s1 diff -Nru opsin-1.5.0/pom.xml opsin-2.3.1/pom.xml --- opsin-1.5.0/pom.xml 2013-07-21 14:02:29.000000000 +0000 +++ opsin-2.3.1/pom.xml 2017-07-23 20:55:18.000000000 +0000 @@ -1,13 +1,13 @@ 4.0.0 - uk.ac.cam.ch.wwmm - wwmm-parent - 4 + org.sonatype.oss + oss-parent + 9 uk.ac.cam.ch.opsin opsin - 1.5.0 + 2.3.1 pom OPSIN Open Parser for Systematic IUPAC Nomenclature @@ -30,7 +30,6 @@ Daniel Lowe - dl387@cam.ac.uk http://bitbucket.org/dan2097 University of Cambridge http://www.cam.ac.uk @@ -56,6 +55,23 @@ + + + maven-compiler-plugin + 3.1 + + 1.6 + 1.6 + + + + + maven-source-plugin + 2.2.1 + + true + + maven-assembly-plugin false @@ -63,55 +79,17 @@ fullAssembly.xml - - - uk.ac.cam.ch.wwmm.opsin.NameToStructure - - + + + uk.ac.cam.ch.wwmm.opsin.NameToStructure + + - org.codehaus.mojo - cobertura-maven-plugin - - - false - - - uk.ac.cam.ch.wwmm.* - 80 - 80 - - - - - - uk/ac/cam/ch/wwmm/**/*.class - - - - - - clean - pre-site - - clean - - - - instrument - site - - instrument - cobertura - check - - - - - + org.apache.maven.plugins maven-site-plugin - 3.0 + 3.3 @@ -120,6 +98,7 @@ org.apache.maven.plugins maven-project-info-reports-plugin + 2.7 @@ -137,30 +116,36 @@ org.apache.maven.plugins maven-javadoc-plugin + 2.9.1 org.apache.maven.plugins maven-surefire-report-plugin + 2.17 org.apache.maven.plugins maven-jxr-plugin + 2.4 org.apache.maven.plugins maven-pmd-plugin + 3.1 - 1.5 + 1.6 true org.codehaus.mojo cobertura-maven-plugin + 2.6 org.codehaus.mojo apt-maven-plugin + 1.0-alpha-5 @@ -172,28 +157,14 @@ 1.11-8
- xom - xom - 1.2.5 - - - xml-apis - xml-apis - - - xerces - xercesImpl - - - xalan - xalan - - + org.codehaus.woodstox + woodstox-core-asl + 4.4.1 log4j log4j - 1.2.16 + 1.2.17 junit @@ -218,12 +189,12 @@ commons-io commons-io - 2.0.1 + 2.4 commons-cli commons-cli - 1.2 + 1.3.1
diff -Nru opsin-1.5.0/README.text opsin-2.3.1/README.text --- opsin-1.5.0/README.text 2013-07-21 14:02:29.000000000 +0000 +++ opsin-2.3.1/README.text 2017-07-23 20:55:18.000000000 +0000 @@ -1,43 +1,45 @@ OPSIN - Open Parser for Systematic IUPAC Nomenclature ===================================================== -__Version 1.5.0 (see ReleaseNotes.txt for what's new in this version)__ -__Contact address: __ +__Version 2.3.1 (see ReleaseNotes.txt for what's new in this version)__ __Source code: __ __Web interface and informational site: __ __License: [Artistic License 2.0](http://opensource.org/licenses/Artistic-2.0)__ -OPSIN is a Java(1.5+) library for IUPAC name-to-structure conversion offering high recall and precision on organic chemical nomenclature. -Supported outputs are SMILES, CML (Chemical Markup Language), InChI (IUPAC International Chemical Identifier) and Standard InChI +OPSIN is a Java(1.6+) library for IUPAC name-to-structure conversion offering high recall and precision on organic chemical nomenclature. +Supported outputs are SMILES, CML (Chemical Markup Language) and InChI (IUPAC International Chemical Identifier) ### Simple Usage Examples #### Convert a chemical name to SMILES -`java -jar opsin-1.5.0-jar-with-dependencies.jar -osmi input.txt output.txt` +`java -jar opsin-2.3.1-jar-with-dependencies.jar -osmi input.txt output.txt` where input.txt contains chemical name/s, one per line NameToStructure nts = NameToStructure.getInstance(); String smiles = nts.parseToSmiles("acetonitrile"); #### Convert a chemical name to CML -`java -jar opsin-1.5.0-jar-with-dependencies.jar -ocml input.txt output.txt` +`java -jar opsin-2.3.1-jar-with-dependencies.jar -ocml input.txt output.txt` where input.txt contains chemical name/s, one per line NameToStructure nts = NameToStructure.getInstance(); - Element cml = nts.parseToCML("acetonitrile"); + String cml = nts.parseToCML("acetonitrile"); -#### Convert a chemical name to InChI/StdInChI -`java -jar opsin-1.5.0-jar-with-dependencies.jar -oinchi input.txt output.txt` -`java -jar opsin-1.5.0-jar-with-dependencies.jar -ostdinchi input.txt output.txt` +#### Convert a chemical name to StdInChI/StdInChIKey/InChI with FixedH +`java -jar opsin-2.3.1-jar-with-dependencies.jar -ostdinchi input.txt output.txt` +`java -jar opsin-2.3.1-jar-with-dependencies.jar -ostdinchikey input.txt output.txt` +`java -jar opsin-2.3.1-jar-with-dependencies.jar -oinchi input.txt output.txt` where input.txt contains chemical name/s, one per line NameToInchi nti = new NameToInchi() - String inchi = nti.parseToInchi("acetonitrile"); String stdinchi = nti.parseToStdInchi("acetonitrile"); + String stdinchikey = nti.parseToStdInchiKey("acetonitrile"); + String inchi = nti.parseToInchi("acetonitrile"); +NOTE: OPSIN's non-standard InChI includes an additional layer (FixedH) that indicates which tautomer the chemical name described. StdInChI aims to be tautomer independent. ### Advanced Usage -OPSIN 1.5.0 allows enabling of the following parameters: +OPSIN 2.3.1 allows enabling of the following options: -* allowRadicals: Allows substituents to be interpretable e.g. allows ethyl be interpretable -* wildcardRadicals: If allowRadicals is enabled, this option uses atom/s in the output to represent radical/s; 'R' in CML and '*' in SMILES e.g. changes the output of ethyl from C[CH2] to CC\* +* allowRadicals: Allows substituents to be interpretable e.g. allows interpretation of "ethyl" +* wildcardRadicals: If allowRadicals is enabled, this option uses atoms in the output to represent radicals: 'R' in CML and '*' in SMILES e.g. changes the output of ethyl from C[CH2] to CC\* * detailedFailureAnalysis: Provides a potentially more accurate reason as to why a chemical name could not be parsed. This is done by parsing the chemical name from right to left. The trade-off for enabling this is slightly increased memory usage. * allowAcidsWithoutAcid: Allows interpretation of acids without the word acid e.g. "acetic" * allowUninterpretableStereo: Allows stereochemistry uninterpretable by OPSIN to be ignored (When used as a library the OpsinResult has a status of WARNING if stereochemistry was ignored) @@ -45,27 +47,31 @@ \*When used as a library this is done by modifying Log4J's logging level e.g. `Logger.getLogger("uk.ac.cam.ch.wwmm.opsin").setLevel(Level.DEBUG);` -The usage of these parameters on the command line is described in the command line's help dialog accessible via: -`java -jar opsin-1.5.0-jar-with-dependencies.jar -h` +The usage of these options on the command line is described in the command line's help dialog accessible via: +`java -jar opsin-2.3.1-jar-with-dependencies.jar -h` -These parameters may be controlled using the following code: +These options may be controlled using the following code: NameToStructure nts = NameToStructure.getInstance(); NameToStructureConfig ntsconfig = new NameToStructureConfig(); //a new NameToStructureConfig starts as a copy of OPSIN's default configuration ntsconfig.setAllowRadicals(true); OpsinResult result = nts.parseChemicalName("acetonitrile", ntsconfig); - Element cml = result.getCml(); + String cml = result.getCml(); String smiles = result.getSmiles(); - String inchi = NameToInchi.convertResultToInChI(result); String stdinchi = NameToInchi.convertResultToStdInChI(result); -`result.getStatus()` may be checked to see if the conversion was successful. If conversion was unsuccessful the output will always be null. -Note that (std)InChI cannot be generated for polymers or radicals generated in combination with the wildcardRadicals option +`result.getStatus()` may be checked to see if the conversion was successful. +If a structure was generated but OPSIN believes there may be a problem a status of WARNING is returned. Currently this may occur if the name appeared to be ambiguous or stereochemistry was ignored. +By default only optical rotation specification is ignored (this cannot be converted to stereo-configuration algorithmically). + +Convenience methods like `result.nameAppearsToBeAmbiguous()` may be used to check the cause of the warning. + +NOTE: (Std)InChI cannot be generated for polymers or radicals generated in combination with the wildcardRadicals option ### Availability OPSIN is available as a standalone JAR from Bitbucket, -`opsin-1.5.0-jar-with-dependencies.jar` can be executed as a commandline application or added to the classpath for library usage. +`opsin-2.3.1-jar-with-dependencies.jar` can be executed as a commandline application or added to the classpath for library usage. OPSIN is also available from the Maven Central Repository for users of Apache Maven. If you are using Maven then add the following to your pom.xml: @@ -73,7 +79,7 @@ uk.ac.cam.ch.opsin opsin-core - 1.5.0 + 2.3.1 If you need just CML or SMILES output support @@ -83,11 +89,18 @@ uk.ac.cam.ch.opsin opsin-inchi - 1.5.0 + 2.3.1 if you also need InChI output support. +#### Building from source +To build OPSIN from source, download Maven 3 and download OPSIN's source code. + +Running `mvn package assembly:assembly` in the root of OPSIN's source will build the jar with dependencies + +Running `mvn assembly:assembly` in the opsin-core folder will build the "excludingInChI-jar-with-dependencies" + ### About OPSIN The workings of OPSIN are more fully described in: @@ -96,7 +109,7 @@ Daniel M. Lowe, Peter T. Corbett, Peter Murray-Rust, Robert C. Glen Journal of Chemical Information and Modeling 2011 51 (3), 739-753 -If you use OPSIN to produce results for publication, then it would be great if you could cite us. +If you use OPSIN in your work, then it would be great if you could cite us. The following list broadly summarises what OPSIN can currently do and what will be worked on in the future. @@ -129,12 +142,13 @@ * Simple bridge prefixes e.g. methano * Specification of oxidation numbers and charge on elements * Perhalogeno terms -* Subtractive prefixes: deoxy, dehydro, anhydro +* Subtractive prefixes: deoxy, dehydro, anhydro, demethyl, deamino * Stoichiometry ratios and mixture indicators * Nucleosides, (oligo)nucleotides and their esters * Carbohydrate nomenclature * Simple CAS names including inverted CAS names * Steroids including alpha/beta stereochemistry +* Isotopic labelling * E/Z/R/S stereochemistry * cis/trans indicating relative stereochemistry on rings and as a synonym of E/Z @@ -142,7 +156,7 @@ * Other less common stereochemical terms * Most natural Products other than steroids * Natural product specific nomenclature operations -* Multiplied, unsaturated or composite bridge prefixes e.g. epoxymethano +* Unsaturated or composite bridge prefixes e.g. epoxymethano ### Developers and Contributors * Dr. Daniel Lowe (Current maintainer) @@ -151,5 +165,12 @@ We are thankful for contributions from Albina Asadulina and Rich Apodaca +![YourKit Logo](https://www.yourkit.com/images/yklogo.png) + +OPSIN's developers use YourKit to profile and optimise code. + +YourKit supports open source projects with its full-featured Java Profiler. +YourKit, LLC is the creator of [YourKit Java Profiler](https://www.yourkit.com/java/profiler/index.jsp) and [YourKit .NET Profiler](https://www.yourkit.com/.net/profiler/index.jsp), innovative and intelligent tools for profiling Java and .NET applications. + Good Luck and let us know if you have problems, comments or suggestions! -Bugs may be reported on the project's [issue tracker](https://bitbucket.org/dan2097/opsin/issues). \ No newline at end of file +Bugs may be reported on the project's [issue tracker](https://bitbucket.org/dan2097/opsin/issues). diff -Nru opsin-1.5.0/ReleaseNotes.txt opsin-2.3.1/ReleaseNotes.txt --- opsin-1.5.0/ReleaseNotes.txt 2013-07-21 14:02:29.000000000 +0000 +++ opsin-2.3.1/ReleaseNotes.txt 2017-07-23 20:55:18.000000000 +0000 @@ -1,4 +1,113 @@ -Version 1.5.0 +Version 2.3.1 (2017-07-23) +Fixed fused ring numbering algorithm incorrectly numbering some ortho- and peri-fused fused systems involving 7-membered rings +Support P-thio to indicate thiophosphate linkage +Count of isotopic replacements no longer required if locants given +Fixed bug where CIP algorithm could assign priorities to identical substituents +Fixed "DL" before a substituent not assigning the substituted alpha-carbon as racemic stereo +L-stereochemistry no longer assumed on semi-systematic glycine derivatives e.g. phenylglycine +Fixed some cases where substituents like carbonyl should have been part of an implicitly bracketed section +Fixed interpretation of leucinic acid and 3/4/5-pyrazolone + +Version 2.3.0 (2017-02-23) +D/L stereochemistry can now be assigned algorithmically e.g. L-2-aminobutyric acid +Other minor improvements to amino acid support e.g. homoproline added +Extended SMILES added to command-line interface +Names intended to include the triiodide/tribromide anion no longer erroneously have three monohalides +Ambiguity detected when applying unlocanted subtractive prefixes +Better support for adjacent multipliers e.g. ditrifluoroacetic acid +deoxynucleosides are now implicitly 2'-deoxynucleosides +Added support for as a syntax for a superscripted number +Added support for amidrazones +Aluminium hydrides/chlorides/bromides/iodides are now covalently bonded +Fixed names with isotopes less than 10 not being supported +Fixed interpretation of some trivial names that clash with systematic names + +Version 2.2.0 (2016-10-16) +Added support for IUPAC system for isotope specification e.g. (3-14C,2,2-2H2)butane +Added support for specifying deuteration using the Boughton system e.g. butane-2,2-d2 +Added support for multiplied bridges e.g. 1,2:3,4-diepoxy +Front locants after a von baeyer descriptor are now supported e.g. bicyclo[2.2.2]-7-octene +onosyl substituents now supported e.g. glucuronosyl +More sugar substituents e.g. glucosaminyl +Improved support for malformed polycyclic spiro names +Support for oximino as a suffix +Added method [NameToStructure.getVersion()] to retrieve OPSIN version number +Allowed bridges to be used as detachable prefixes +Allow odd numbers of hydro to be added e.g. trihydro +Added support for unbracketed R stereochemistry (but not S, for the moment, due to the ambiguity with sulfur locants) +Various minor bug fixes e.g. stereochemistry was incorrect for isovaline +Minor vocabulary improvements + +Version 2.1.0 (2016-03-12) +Added support for fractional multipliers e.g. hemihydrochloride +Added support for abbreviated common salts e.g. HCl +Added support for sandwich compounds e.g. ferrocene +Improved recognition of names missing the last 'e' (common in German) +Support for E/Z directly before double bond indication e.g. 2Z-ylidene, 2Z-ene +Improved support for functional class ethers e.g. "glycerol triglycidyl ether" +Added general support for names involving an ester formed from an alcohol and an ate group +Grignards reagents and certain compounds (e.g. uranium hexafluoride), are now treated as covalent rather than ionic +Added experimental support for outputting extended SMILES. Polymers and attachment points are annotated explicitly +Polymers when output as SMILES now have atom classes to indicate which end of the repeat unit is which +Support * as a superscript indicator e.g. *6* to mean superscript 6 +Improved recognition of racemic stereochemistry terms +Added general support for names like "beta-alanine N,N-diacetic acid" +Allowed "one" and "ol" suffixes to be used in more cases where another suffix is also present +"ic acid halide" is not interpreted the same as "ic halide" +Fixed some cases where ambiguous operations were not considered ambiguous e.g. monosubstitututed phenyl +Improvements/bug fixes to heuristics for detecting when spaces are omitted from ether/ester names +Improved support for stereochemistry in older CAS index names +Many precision improvements e.g. cyclotriphosphazene, thiazoline, TBDMS/TBDPS protecting groups, S-substituted-methionine +Various minor bug fixes e.g. names containing "SULPH" not recognized +Minor vocabulary improvements + +Internal XML Changes: +Synonymns of the same concept are now or-ed rather being seperate entities e.g. tertiary|tert-|t- + +Version 2.0.0 (2015-07-10) +MAJOR CHANGES: +Requires Java 1.6 or higher +CML (Chemical Markup Language) is now returned as a String rather than a XOM Element +OPSIN now attempts to identify if a chemical name is ambiguous. Names that appear ambiguous return with a status of WARNING with the structure provided being one interpretation of the name + +Added support for "alcohol esters" e.g. phenol acetate [meaning phenyl acetate] +Multiplied unlocanted substitution is now more intelligent e.g. all substituents must connect to same group, and degeneracy of atom environments is taken into account +The ester interpretation is now preferred in more cases where a name does not contain a space but the parent is methanoate/ethanoate/formate/acetate/carbamate +Inorganic oxides are now interpreted, yielding structures with [O-2] ions +Added more trivial names of simple molecules +Support for nitrolic acids +Fixed parsing issue where a directly substituted acetal was not interpretable +Fixed certain groups e.g. phenethyl, not having their suffix attached to a specific location +Corrected interpretation of xanthyl, and various trivial names that look systematic +Name to structure is now ~20% faster +Initialisation time reduced by a third +InChI generation is now ~20% faster +XML processing dependency changed from XOM to Woodstox +Significant internal refactoring +Utility functions designed for internal use are no longer on the public API +Various minor bug fixes + +Internal XML Changes: +Groups lacking a labels attribute now have no locants (previously had ascending numeric locants) +Syntax for addGroup/addHeteroAtom/addBond attributes changed to be easier to parse and allow specification of whether the name is ambiguous if a locant is not provided + +Version 1.6.0 (2014-04-26) +Added API/command-line options to generate StdInchiKeys +Added support for the IUPAC recommended nomenclature for carbobohydrate lactones +Added support for boronic acid pinacol esters +Added basic support for specifying chalcogen acid tautomer form e.g. thioacetic S-acid +Fused ring bridges are now numbered +Names with Endo/Exo/Syn/Anti stereochemistry can now be partially interpreted if warnRatherThanFailOnUninterpretableStereochemistry is used +The warnRatherThanFailOnUninterpretableStereochemistry option will now assign as much stereochemistry as OPSIN understands (All ignored stereochemistry terms are mentioned in the OpsinResult message) +Many minor nomenclature support improvements e.g. succinic imide; hexaldehyde; phenyldiazonium, organotrifluoroborates etc. +Added more trivial names that can be confused with systematic names e.g. Imidazolidinyl urea +Fixed StackOverFlowError that could occur when processing molecules with over 5000 atoms +Many minor bug fixes +Minor vocabulary improvements +Minor speed improvements +NOTE: This is the last release to support Java 1.5 + +Version 1.5.0 (2013-07-21) Command line interface now accepts files to read and write to as arguments Added option to allow interpretation of acids missing the word acid e.g. "acetic" (off by default) Added option to treat uninterpretable stereochemistry as a warning rather than a failure (off by default) @@ -7,7 +116,7 @@ Vocabulary improvements e.g. homo/beta amino acids Many minor bug fixes e.g. fulminic acid correctly interpreted -Version 1.4.0 +Version 1.4.0 (2013-01-27) Added support for dialdoses,diketoses,ketoaldoses,alditols,aldonic acids,uronic acids,aldaric acids,glycosides,oligosacchardides, named systematically or from trivial stems, in cyclic or acyclic form Added support for ketoses named using dehydro Added support for anhydro @@ -17,10 +126,10 @@ Added hydrazido and anilate suffixes Allowed more functional class nomenclature to apply to amino acids Added support for inverting CAS names with substituted functional terms e.g. Acetaldehyde, O-methyloxime -Double substitution of a deoxy chiral centre now uses the CIP rules to decide which substituent replaced the hydroxy group +Double substitution of a deoxy chiral centre now uses the CIP rules to decide which substituent replaced the hydroxy group Unicode right arrows, superscripts and the soft hyphen are now recognised -Version 1.3.0 +Version 1.3.0 (2012-09-16) Added option to output radicals as R groups (* in SMILES) Added support for carbolactone/dicarboximide/lactam/lactim/lactone/olide/sultam/sultim/sultine/sultone suffixes Resolved some cases of ambiguity in the grammar; the program's capability to handle longer peptide names is improved @@ -32,7 +141,7 @@ NameToStructure.getInstance() no longer throws a checked exception Many minor bug fixes -Version 1.2.0 +Version 1.2.0 (2011-12-06) OPSIN is now available from Maven Central Basic support for cylised carbohydrates e.g. alpha-D-glucopyranose Basic support for systematic carbohydrate stems e.g. D-glycero-D-gluco-Heptose @@ -42,7 +151,7 @@ Fixed a few minor bugs/limitations in the Cahn-Ingold-Prelog rules implementation and made more memory efficient Many minor improvements and bug fixes -Version 1.1.0 +Version 1.1.0 (2011-06-16) Significant improvements to fused ring numbering code, specifically 3/4/5/7/8 member rings are no longer only allowed in chains of rings Added support for outputting to StdInChI Small improvements to fused ring building code @@ -51,7 +160,7 @@ Improvements to parsing speed Many minor improvements and bug fixes -Version 1.0.0 +Version 1.0.0 (2011-03-09) Added native isomeric SMILES output Improved command-line interface. The desired format i.e. CML/SMILES/InChI as well as options such as allowing radicals can now all be specified via flags Debugging is now performed using log4j rather than by passing a verbose flag @@ -68,7 +177,7 @@ Added support for R/S stereochemistry indicated by a locant which is also used to indicate the point of substitution for a substituent Many minor improvements and bug fixes -Version 0.9.0 +Version 0.9.0 (2010-11-01) Added transition metals/f-block elements and nobel gases Added support for specifying the charge or oxidation number on elements e.g. aluminium(3+), iron(II) Calculations based off a van Arkel diagram are now used to determine whether functional bonds to metals should be treated as ionic or covalent @@ -87,7 +196,7 @@ Esters of biochemical compounds e.g. triphosphates are now supported Many minor improvements and bug fixes -Version 0.8.0 +Version 0.8.0 (2010-07-16) NameToStructureConfig can now be used to configure whether radicals e.g. ethyl are output or not. Names like carbon tetrachloride are now supported glycol ethers e.g. ethylene glycol ethyl ether are now supported @@ -100,7 +209,7 @@ Parsing is now even faster Various bug fixes and name intepretation fixes -Version 0.7.0 +Version 0.7.0 (2010-06-09) Added full support for conjunctive nomenclature e.g. 1,3,5-benzenetriacetic acid Added basic support for CAS names Added trivial poly-noncarboxylic acids and more trivial carboxylic acids @@ -117,7 +226,7 @@ Mixtures specified by separating components by semicolonspace are now supported Many internal improvements and bug fixes -Version 0.6.1 +Version 0.6.1 (2010-03-18) Counter ions are now duplicated such as to lead to if possible a neutral compound In names like nitrous amide the atoms modified by the functional replacement can now be substituted Allowed ~number~ for specifying superscripts @@ -126,7 +235,7 @@ Tetrahedral sulfur stereochemistry is now recognised Bug fixes to fix incorrect interpretation of some names e.g. triphosgene is now unparseable rather than 3 x phosghene, phospho has different meanings depending on whether it used on an amino acid or another group etc. -Version 0.6.0 +Version 0.6.0 (2010-02-18) OPSIN is now a mavenised project consisting of two modules: core and inchi. Core does name -->CML, inchi depends on core and allows conversion to inchi Instead of CML an OpsinResult can be returned which can yield information as to why a name was not interpretable Added support for unlocanted R/S/E/Z stereochemistry. Removed limit on number of atoms that stereochemistry code can handle @@ -152,7 +261,7 @@ Sulph is now treated like sulf as in sulphuric acid and many misc fixes and improvements -Version 0.5.3 +Version 0.5.3 (2009-10-22) Added support for amic, aldehydic, anilic, anilide, carboxanilide and amoyl suffixes Added support for cyclic imides e.g. succinimide/succinimido Added support for amide functional class @@ -162,7 +271,7 @@ Slight improvement in method for deciding which group detachable hydro prefixes apply to. Minor vocabulary update -Version 0.5.2 +Version 0.5.2 (2009-10-04) Outputting directly to InChI is now supported using the separately available nameToInchi jar (an OPSIN jar is expected in the same location as the nameToInchi jar) Fused rings with any number of rings in a chain or formed entirely of 6 membered rings can now be numbered Added support for E/Z/R/S where locants are given. Unlocanted cases will be dealt with in a subsequent release. In very large molecules a lack of memory may be encountered, this will be resolved in a subsequent release @@ -175,7 +284,7 @@ Removed dependence on Nux/Saxon Misc minor fixes -Version 0.5.1 +Version 0.5.1 (2009-07-20) Huge reduction in OPSIN initialisation time (typical ~7 seconds -->800ms) Allowed thio/seleno/telluro as divalent linkers and for functional replacement when used as prefixes. Peroxy can now be used for functional replacement Better support for semi-trivally named hydrocarbon fused rings e.g. tetracene @@ -184,8 +293,8 @@ Support for names like triethyltetramine and triethylene glycol Misc other fixes to prevent OPSIN generating the wrong structure for certain types of names -Version 0.5 +Version 0.5 (2009-06-23) Too many changes to list -Version 0.1 +Version 0.1 (2006-10-11) Initial release \ No newline at end of file