From c864faba0be36d702b4cea5f01dbee827e9d607a Mon Sep 17 00:00:00 2001 From: luckylionheart Date: Thu, 16 Oct 2025 13:53:20 -0400 Subject: [PATCH 1/2] WIP: fix #448 --- index.js | 4320 ++++++++++++++++++++++++++----------------------- test_nlp.json | 542 ++++--- 2 files changed, 2628 insertions(+), 2234 deletions(-) diff --git a/index.js b/index.js index 1f555ed..f85d004 100755 --- a/index.js +++ b/index.js @@ -13,2042 +13,2322 @@ * Own matchAll used for the chemical formulae */ function getMatchAll(inputstr, regexp) { - const matches = []; - inputstr.replace(regexp, function (...args) { - const arr = [].slice.call(args, 0), - extras = arr.splice(-2); - arr.index = extras[0]; - arr.input = extras[1]; - matches.push(arr); - }); - return matches.length ? matches : null; + const matches = []; + inputstr.replace(regexp, function (...args) { + const arr = [].slice.call(args, 0), + extras = arr.splice(-2); + arr.index = extras[0]; + arr.input = extras[1]; + matches.push(arr); + }); + return matches.length ? matches : null; } function capitalize(string) { - return string.charAt(0).toUpperCase() + string.slice(1); + return string.charAt(0).toUpperCase() + string.slice(1); } const OptimadeNLP = function () { - /* - * Definitions - */ - const stop_words = [ - 'a', - 'about', - 'above', - 'after', - 'again', - 'against', - 'all', - 'am', - 'an', - 'and', - 'any', - 'are', - "aren't", - 'as', - 'at', - 'be', - 'because', - 'been', - 'before', - 'being', - 'below', - 'between', - 'both', - 'but', - 'by', - "can't", - 'cannot', - 'could', - "couldn't", - 'did', - "didn't", - 'do', - 'does', - "doesn't", - 'doing', - "don't", - 'down', - 'during', - 'each', - 'few', - 'for', - 'from', - 'further', - 'had', - "hadn't", - 'has', - "hasn't", - 'have', - "haven't", - 'having', - 'he', - "he'd", - "he'll", - "he's", - 'her', - 'here', - "here's", - 'hers', - 'herself', - 'him', - 'himself', - 'his', - 'how', - "how's", - 'i', - "i'd", - "i'll", - "i'm", - "i've", - 'if', - 'in', - 'into', - 'is', - "isn't", - 'it', - "it's", - 'its', - 'itself', - "let's", - 'me', - 'more', - 'most', - "mustn't", - 'my', - 'myself', - 'no', - 'nor', - 'not', - 'of', - 'off', - 'on', - 'once', - 'only', - 'or', - 'other', - 'ought', - 'our', - 'ours', - 'ourselves', - 'out', - 'over', - 'own', - 'same', - "shan't", - 'she', - "she'd", - "she'll", - "she's", - 'should', - "shouldn't", - 'so', - 'some', - 'such', - 'than', - 'that', - "that's", - 'the', - 'their', - 'theirs', - 'them', - 'themselves', - 'then', - 'there', - "there's", - 'these', - 'they', - "they'd", - "they'll", - "they're", - "they've", - 'this', - 'those', - 'through', - 'to', - 'too', - 'u', - 'under', - 'until', - 'up', - 'very', - 'was', - "wasn't", - 'we', - "we'd", - "we'll", - "we're", - "we've", - 'were', - "weren't", - 'what', - "what's", - 'when', - "when's", - 'where', - "where's", - 'which', - 'while', - 'who', - "who's", - 'whom', - 'why', - "why's", - 'with', - "won't", - 'would', - "wouldn't", - 'you', - "you'd", - "you'll", - "you're", - "you've", - 'your', - 'yours', - 'yourself', - 'yourselves', - ]; /* exact */ - - const arity_keys = [ - null, - 'unary', - 'binary', - 'ternary', - 'quaternary', - 'quinary', - 'multinary', - 'multinary', - 'multinary', - 'multinary', - 'multinary', - ]; // NB null is for "0-ary" - - const periodic_elements = [ - 'h', - 'he', - 'li', - 'be', - 'b', - 'c', - 'n', - 'o', - 'f', - 'ne', - 'na', - 'mg', - 'al', - 'si', - 'p', - 's', - 'cl', - 'ar', - 'k', - 'ca', - 'sc', - 'ti', - 'v', - 'cr', - 'mn', - 'fe', - 'co', - 'ni', - 'cu', - 'zn', - 'ga', - 'ge', - 'as', - 'se', - 'br', - 'kr', - 'rb', - 'sr', - 'y', - 'zr', - 'nb', - 'mo', - 'tc', - 'ru', - 'rh', - 'pd', - 'ag', - 'cd', - 'in', - 'sn', - 'sb', - 'te', - 'i', - 'xe', - 'cs', - 'ba', - 'la', - 'ce', - 'pr', - 'nd', - 'pm', - 'sm', - 'eu', - 'gd', - 'tb', - 'dy', - 'ho', - 'er', - 'tm', - 'yb', - 'lu', - 'hf', - 'ta', - 'w', - 're', - 'os', - 'ir', - 'pt', - 'au', - 'hg', - 'tl', - 'pb', - 'bi', - 'po', - 'at', - 'rn', - 'fr', - 'ra', - 'ac', - 'th', - 'pa', - 'u', - 'np', - 'pu', - 'am', - 'cm', - 'bk', - 'cf', - 'es', - 'fm', - 'md', - 'no', - 'lr', - 'rf', - 'db', - 'sg', - 'bh', - 'hs', - 'mt', - 'ds', - 'rg', - 'cn', - 'nh', - 'fl', - 'mc', - 'lv', - 'ts', - 'og' - ]; /* exact */ - - const periodic_elements_cased = periodic_elements.map(function (x) { - return capitalize(x); - }); - - const periodic_element_names = [ - 'hydrogen', - 'helium', - 'lithium', - 'beryllium', - 'boron', - 'carbon', - 'nitrogen', - 'oxygen', - 'fluorine', - 'neon', - 'sodium', - 'magnesium', - 'aluminium', - 'silicon', - 'phosphorus', - 'sulfur', - 'chlorine', - 'argon', - 'potassium', - 'calcium', - 'scandium', - 'titanium', - 'vanadium', - 'chromium', - 'manganese', - 'iron', - 'cobalt', - 'nickel', - 'copper', - 'zinc', - 'gallium', - 'germanium', - 'arsenic', - 'selenium', - 'bromine', - 'krypton', - 'rubidium', - 'strontium', - 'yttrium', - 'zirconium', - 'niobium', - 'molybdenum', - 'technetium', - 'ruthenium', - 'rhodium', - 'palladium', - 'silver', - 'cadmium', - 'indium', - 'tin', - 'antimony', - 'tellurium', - 'iodine', - 'xenon', - 'caesium', - 'barium', - 'lanthanum', - 'cerium', - 'praseodymium', - 'neodymium', - 'promethium', - 'samarium', - 'europium', - 'gadolinium', - 'terbium', - 'dysprosium', - 'holmium', - 'erbium', - 'thulium', - 'ytterbium', - 'lutetium', - 'hafnium', - 'tantalum', - 'tungsten', - 'rhenium', - 'osmium', - 'iridium', - 'platinum', - 'gold', - 'mercury', - 'thallium', - 'lead', - 'bismuth', - 'polonium', - 'astatine', - 'radon', - 'francium', - 'radium', - 'actinium', - 'thorium', - 'protactinium', - 'uranium', - 'neptunium', - 'plutonium', - 'americium', - 'curium', - 'berkelium', - 'californium', - 'einsteinium', - 'fermium', - 'mendelevium', - 'nobelium', - 'lawrencium', - 'rutherfordium', - 'dubnium', - 'seaborgium', - 'bohrium', - 'hassium', - 'meitnerium', - 'darmstadium', - 'roentgenium', - 'copernicium', - 'nihonium', - 'flerovium', - 'moscovium', - 'livermorium', - 'tennessine', - 'oganesson' - ]; /* fuzzy */ - - const lat_p2i = { - cubic: 1, - hexagonal: 2, - trigonal: 3, - tetragonal: 4, - orthorhombic: 5, - monoclinic: 6, - triclinic: 7, - rhombohedral: 3, - cub: 1, - hex: 2, - hexag: 2, - trig: 3, - tet: 4, - tetr: 4, - tetrag: 4, - orth: 5, - ortho: 5, - monocl: 6, - tric: 7, - tricl: 7, - rhom: 3, - rhomb: 3, - }; - const lat_fgrs = Object.keys(lat_p2i); - const lat_i2p = { - 1: 'cubic', - 2: 'hexagonal', - 3: 'trigonal', - 4: 'tetragonal', - 5: 'orthorhombic', - 6: 'monoclinic', - 7: 'triclinic', - }; - - const mpds_classes = [ - 'ab initio calculations', - 'ab initio literature', - 'actinoid', - 'adamantane', - 'aegirine', - 'alkali', - 'alkaline', - 'allargentum', - 'almandine', - 'alum', - 'alunogen', - 'amide', - 'analcime', - 'anatase', - 'anorpiment', - 'anorthoclase', - 'antiferroelectric', - 'antiferromagnet', - 'antiferromagnetic', - 'arsenate', - 'arsenide', - 'ashcroftine', - 'auricupride', - 'aurocupride', - 'azide', - 'baileychlore', - 'bariopyrochlore', - 'baryte', - 'beryl', - 'beta-alumina', - 'beta-boron', - 'biguanide', - 'binary', - 'birefringent', - 'borane', - 'borate', - 'borax', - 'boride', - 'borocarbide', - 'borohydride', - 'boronitride', - 'botryogen', - 'bromanilate', - 'bromide', - 'bromoimide', - 'calomel', - 'carbamate', - 'carbide', - 'carbonate', - 'carbonyl', - 'carboxylate', - 'celestine', - 'cell and atoms', - 'cell-only', - 'celsian', - 'cesiokenopyrochlore', - 'chalcogen', - 'charge-density wave state', - 'chevrel', - 'chimney-ladder', - 'chloranilate', - 'chlorate', - 'chloride', - 'chloritoid', - 'chlorosulfate', - 'chromate', - 'chrysoberyl', - 'chrysotile', - 'cinnabar', - 'clathrate', - 'clinochlore', - 'clinoclase', - 'clodronate', - 'close-packed', - 'cluster glass', - 'colossal magnetoresistance', - 'conductor', - 'corundum', - 'cosmochlor', - 'croconate violet', - 'croconate', - 'cryptomelane', - 'cuprate', - 'cuspidine', - 'cyamelurate', - 'cyanamide', - 'cyanamidonitrate', - 'cyananilate', - 'cyanide', - 'cyanotetrazolate', - 'cyanoureate', - 'cyanurate', - 'cyprine', - 'davyne', - 'deuteride', - 'deuterium', - 'devilline', - 'diamagnetic', - 'diamond', - 'diarsenate', - 'diaspore', - 'diazanide', - 'diazenide', - 'dichromate', - 'digermanate', - 'diiodobromide', - 'dinitramide', - 'diopside', - 'dioptase', - 'dioxobromate', - 'dioxoiodate', - 'dioxosulfate', - 'dioxothiosulfate', - 'diphosphate', - 'diphosphonate', - 'dipolyhedral', - 'diselenate', - 'disilicate', - 'disordered', - 'disulfate', - 'dithiocarbamate', - 'dithiocarbonate', - 'dithionate', - 'dithiooxalate', - 'dithiophosphate', - 'dithiosquarate', - 'divanadate', - 'epidote', - 'euchlorine', - 'euclase', - 'eudialyte', - 'eulytine', - 'fermi liquid', - 'feroxihyte', - 'feroxyhyte', - 'ferrielectric', - 'ferrimagnet', - 'ferroelastic', - 'ferroelectric', - 'ferromagnet', - 'ferromagnetic', - 'fluor-schorl', - 'fluoride', - 'fluoroborate', - 'frank-kasper', - 'friauf-laves', - 'fulleride', - 'fulminate', - 'galena', - 'gamma-brass', - 'garnet', - 'giant magnetocaloric effect', - 'gismondine', - 'glaucodot', - 'glaucophane', - 'grossular', - 'guanidinate', - 'gypsum', - 'hafnon', - 'half metal', - 'halogen', - 'hard magnet', - 'harmotome', - 'haueyne', - 'heavy fermion', - 'hedyphane', - 'helimagnet', - 'helvine', - 'hexasulfate', - 'hexathionate', - 'host-guest', - 'humboldtine', - 'hydrate', - 'hydride', - 'hydroxide', - 'hypercinnabar', - 'hypophosphate', - 'ice', - 'imide', - 'iminate', - 'intercalation', - 'intermediate valence', - 'intermetallic', - 'iodate', - 'iodide', - 'ionic conductor', - 'iridium', - 'isoferroplatinum', - 'isopolyhedral', - 'isothermal section', - 'kornerupine', - 'kosmochlor', - 'lanthanoid', - 'lavendulan', - 'levyne', - 'lime', - 'liquidus projection', - 'litharge', - 'lithiophosphate', - 'luminescent', - 'machine learning', - 'machine-learning', - 'magnesiochloritoid', - 'magnetoelastic', - 'magnetoelectric', - 'manganate', - 'massicot', - 'mellitate', - 'melonate', - 'metacinnabar', - 'metal', - 'metalloid', - 'metamagnet', - 'metavoltine', - 'mica', - 'microcline', - 'microline', - 'mictomagnet', - 'minium', - 'molybdate', - 'multiferroic', - 'multinary', - 'nasicon', - 'natron', - 'natrophosphate', - 'natroxalate', - 'negative thermal expansion', - 'nepheline', - 'nickeline', - 'niobocarbide', - 'niter', - 'nitranilate', - 'nitrate', - 'nitratine', - 'nitride', - 'nitroformate', - 'noble gas', - 'non disordered', - 'non-disordered', - 'non-linear optics', - 'nonaflate', - 'nonmetal', - 'nordenskioeldine', - 'nosean', - 'olivine', - 'optically isotropic', - 'organic', - 'orpiment', - 'orthoborate', - 'orthoclase', - 'orthogermanate', - 'orthonitrate', - 'orthophosphate', - 'orthosilicate', - 'oxalate', - 'oxamate', - 'oxide', - 'oxoiodate', - 'oxonitrate', - 'oxotetrazolate', - 'oxy-schorl', - 'ozonide', - 'palladogermanide', - 'paracelsian', - 'paramagnet', - 'pararealgar', - 'pauli paramagnet', - 'peer review', - 'peer reviewed', - 'peer-review', - 'peer-reviewed', - 'perchlorate', - 'perhydrate', - 'periclase', - 'periodate', - 'permanganate', - 'pernitride', - 'peroxide', - 'pertechnetate', - 'phosphate', - 'phosphide', - 'phosphinate', - 'phosphonate', - 'photocatalyst', - 'photovoltaic effect', - 'photovoltaic', - 'piezoelectric', - 'pnictogen', - 'polaron conductor', - 'polycrase', - 'potassic', - 'prism', - 'prussian blue', - 'pseudorutile', - 'pyrochlore', - 'pyroelectric', - 'pyrope', - 'quadridavyne', - 'quartz', - 'quasicrystal', - 'quaternary', - 'quinary', - 'radioactive', - 'rare earth', - 'realgar', - 'refractory', - 'relaxor', - 'retzian', - 'rhodarsenide', - 'rhodizonate', - 'rhomboclase', - 'rocksalt', - 'rubicline', - 'ruddlesden-popper', - 'rutheniridosmine', - 'rutherfordine', - 'rutile', - 'salammoniac', - 'sanidine', - 'sapphirine', - 'sarcopside', - 'schorl', - 'selenide', - 'selenidel', - 'semiconductor', - 'semimetal', - 'shape memory effect', - 'shape memory', - 'siderotil', - 'silanide', - 'silicate', - 'silicide', - 'sillen-aurivillius', - 'skyrmion', - 'solidus projection', - 'solvus projection', - 'spessartine', - 'spin glass', - 'spinel', - 'spodumene', - 'squarate', - 'steenstrupine', - 'stibarsen', - 'subsolidus relations', - 'sulfamate', - 'sulfamide', - 'sulfate', - 'sulfide', - 'sulfidel', - 'sulfinylamide', - 'superconductor', - 'superhard', - 'superionic conductor', - 'superoxide', - 'talc', - 'tantalcarbide', - 'tellurantimony', - 'telluride', - 'ternary', - 'tetrahedron', - 'thermoelectric', - 'thiocyanate', - 'thiocyanurate', - 'thiophosphate', - 'thiosulfate', - 'topaz', - 'topological insulator', - 'tourmaline', - 'transitional', - 'triflate', - 'tripolyhedral', - 'triteride', - 'trithionate', - 'tritide', - 'tritium', - 'trona', - 'tungstate', - 'turquoise', - 'ulvoespinel', - 'unary', - 'uranophane', - 'uranopolycrase', - 'urate', - 'urea', - 'ureate', - 'van vleck paramagnet', - 'vanadate', - 'vertical section', - 'violurate', - 'xenotime', - 'zincobotryogen', - 'zircon', - 'zircosulfate', - ]; - - const mpds_props = [ - 'acceptor concentration', - 'acceptor to donor concentration', - 'activation energy', - 'adiabatic bulk modulus', - 'angle-resolved photoelectron spectra', - 'atomic structure', - 'band gap', - 'birefringence', - 'bremsstrahlung isochromat spectra', - 'charge carrier concentration', - 'charge carrier mobility', - 'charge transfer', - 'charge-density wave', - 'charge-transfer energy', - 'coefficient of schottky term in heat capacity', - 'coercive electric field', - 'coercive field', - 'coherence length', - 'cohesive energy', - 'compressibility', - 'conductivity', - 'core-electron contribution to magnetic susceptibility', - 'critical current density', - 'critical magnetic field', - 'crystal electric field parameter', - 'crystal electric field parameters', - 'crystal electric field splitting', - 'crystal electric field', - 'crystal field level', - 'crystalline structure', - 'crystal cell', - 'crystal structure', - 'curie coefficient', - 'curie temperature', - 'curie-weiss paramagnetism', - 'debye temperature', - 'decomposition temperature', - 'decomposition', - 'diamagnetic contribution to magnetic susceptibility', - 'dielectric constant', - 'dielectric loss tangent', - 'diffusion', - 'donor concentration', - 'donor energy', - 'effective charge', - 'effective electron number', - 'effective mass of electrons to holes ratio', - 'effective mass of electrons', - 'effective mass', - 'einstein temperature', - 'elastic compliance', - 'elastic moduli', - 'elastic stiffness coefficient', - 'elasticity', - 'electric field gradient', - 'electric polarization', - 'electrical conductivity', - 'electric properties', - 'electrical properties', - 'electrical resistivity', - 'electrochemical impedance spectroscopy', - 'electron density maps', - 'electron density of states at fermi level', - 'electron density of states', - 'electron energy band structure', - 'electron energy loss spectra', - 'electron grueneisen coefficient', - 'electron mobility', - 'electron paramagnetic resonance spectra', - 'electron spin resonance spectra', - 'electron-phonon interaction parameter', - 'electronic contribution to heat capacity', - 'electronic contribution to thermal conductivity', - 'electronic energy gap', - 'electronic heat capacity coefficient', - 'electronic properties', - 'energy at fermi level', - 'energy band structure', - 'energy gap for direct transition', - 'energy gap for indirect transition', - 'energy level diagram', - 'energy of optical phonon', - 'energy product', - 'energy', - 'enthalpy change at melting point', - 'enthalpy change at phase transition', - 'enthalpy change at structural transition', - 'enthalpy change', - 'enthalpy of formation', - 'enthalpy of reaction', - 'enthalpy', - 'entropy change at melting point', - 'entropy change at phase transition', - 'entropy of formation', - 'entropy of reaction', - 'entropy', - 'eutectoid decomposition', - 'exchange field', - 'exchange interaction parameter', - 'exciton energy', - 'extended x-ray absorption fine structure', - 'extraordinary refractive index', - 'fermi energy', - 'fermi surface', - 'ferroelasticity', - 'ferroelectric curie temperature', - 'ferroelectric hysteresis', - 'ferroelectric neel temperature', - 'ferroelectric transitions', - 'field dependence of resistivity', - 'figure of merit', - 'freezing temperature for spin glass', - 'fusion', - 'gibbs energy change', - 'gibbs energy of formation', - 'gibbs energy of reaction', - 'ginzburg-landau parameter', - 'grueneisen coefficient', - 'gruneisen coefficient', - 'hall coefficient', - 'hall effect', - 'hall mobility', - 'hardness', - 'heat capacity at constant pressure', - 'heat capacity at constant volume', - 'heat capacity coefficient', - 'heat capacity discontinuity at structural transition', - 'heat capacity discontinuity at superconducting transition', - 'heat capacity discontinuity', - 'heat capacity', - 'high-frequency permittivity', - 'hole mobility', - 'hydrogen diffusion', - 'hyperfine magnetic field', - 'imaginary part of magnetic susceptibility', - 'imaginary part of permittivity', - 'inelastic neutron scattering', - 'inelastic x-ray scattering', - 'infrared spectra', - 'ionic conductivity', - 'irreversibility field', - 'isomer shift', - 'isothermal bulk modulus', - 'isothermal linear compressibility', - 'isothermal volume compressibility', - 'knoop hardness', - 'kondo behavior', - 'kondo temperature', - 'lattice', - 'linear magnetostriction', - 'linear thermal expansion coefficient', - 'longitudinal sound velocity', - 'longitudinal-mode elastic coefficient', - 'lorentz number', - 'lower critical magnetic field', - 'lowest temperature of investigation', - 'luminescence lifetime', - 'luminescence', - 'magnetic anisotropy field', - 'magnetic anisotropy', - 'magnetic circular x-ray dichroism', - 'magnetic dichroism', - 'magnetic direction', - 'magnetic entropy', - 'magnetic field for magnetic transition', - 'magnetic field for structural transition', - 'magnetic heat capacity', - 'magnetic hysteresis', - 'magnetic moment', - 'magnetic order', - 'magnetic penetration depth', - 'magnetic phase diagram', - 'magnetic properties', - 'magnetic resistivity', - 'magnetic structure', - 'magnetic susceptibility', - 'magnetic transitions', - 'magnetism', - 'magnetization', - 'magneto-optical effects', - 'magneto-optical kerr effect', - 'magnetostriction', - 'mechanical properties', - 'melting temperature', - 'microhardness', - 'moessbauer spectra', - 'mohs hardness', - 'molar volume', - 'molecular field parameter', - 'muon spin spectra', - 'neel temperature', - 'neutron energy loss spectra', - 'non-linear optical properties', - 'non-linear optics', - 'nuclear magnetic resonance spectra', - 'nuclear quadrupolar resonance spectra', - 'optical absorption coefficient', - 'optical absorption', - 'optical conductivity', - 'optical phonons', - 'optical properties', - 'optical spectra', - 'orbital magnetic moment', - 'ordinary refractive index', - 'paraelectric curie coefficient', - 'paraelectric curie temperature', - 'paraelectric state', - 'paramagnetic curie temperature', - 'paramagnetic moment', - 'pauli magnetic susceptibility', - 'peritectic formation', - 'peritectoid formation', - 'permittivity', - 'perturbed angular correlation', - 'phase diagram', - 'phase diagrams', - 'phase transitions', - 'phonon contribution to thermal conductivity', - 'phonon density of states', - 'phonon dispersion', - 'phonon grueneisen coefficient', - 'phonon heat capacity at constant pressure', - 'phonons', - 'photo-conductivity data', - 'photo-conductivity', - 'photoelectron emission spectra', - 'photoluminescence spectra', - 'physical properties', - 'piezoelectric coefficient', - 'piezoelectric coefficient', - 'piezoelectricity', - 'plasma edge', - 'poisson ratio', - 'power factor', - 'pressure derivative of adiabatic bulk modulus', - 'pressure derivative of curie temperature', - 'pressure derivative of elastic stiffness coefficient', - 'pressure derivative of energy gap', - 'pressure derivative of isothermal bulk modulus', - 'pressure derivative of neel temperature', - 'pressure derivative of superconducting transition temperature', - 'pressure derivative of transition temperature', - 'pressure for magnetic transition', - 'pressure for metal-nonmetal transition', - 'pressure for structural transition', - 'pyroelectric coefficient', - 'pyroelectricity', - 'quadrupole splitting', - 'raman spectra', - 'real part of magnetic permeability', - 'real part of magnetic susceptibility', - 'real part of optical conductivity', - 'real part of permittivity', - 'reflectivity', - 'refractive index', - 'relative cooling power', - 'remanent induction', - 'remanent magnetic field', - 'remanent magnetic moment', - 'remanent magnetization', - 'remanent polarization', - 'residual resistivity ratio', - 'residual resistivity', - 'resistivity anisotropy', - 'resistivity', - 'resonance spectra', - 'saturation magnetic moment', - 'saturation magnetization', - 'second-harmonic generation', - 'seebeck coefficient', - 'shear modulus', - 'soft-x-ray emission spectra', - 'sound velocity', - 'spin contribution to magnetic susceptibility', - 'spin magnetic moment', - 'spin-fluctuation temperature', - 'spin-fluctuation', - 'spin-orbit splitting of valence band', - 'spin-resolved electron density of states at fermi level', - 'spontaneous elastic strain', - 'spontaneous magnetic moment', - 'spontaneous magnetization', - 'spontaneous polarization', - 'static permittivity', - 'stoner enhancement factor', - 'stoner parameter', - 'stoner product', - 'structural properties', - 'structural transition', - 'structural transitions', - 'superconducting transition temperature', - 'superconductivity energy gap', - 'superconductivity phenomena', - 'superconductivity', - 'temperature dependence of resistivity', - 'temperature dependence of static permittivity', - 'temperature derivative of elastic stiffness coefficient', - 'temperature derivative of energy gap', - 'temperature derivative of resistivity', - 'temperature derivative of upper critical magnetic field', - 'temperature for congruent melting', - 'temperature for eutectoid decomposition', - 'temperature for ferroelectric reordering', - 'temperature for magnetic transition', - 'temperature for metal-nonmetal transition', - 'temperature for peritectic formation', - 'temperature for peritectoid formation', - 'temperature for structural transition', - 'temperature-independent part of magnetic susceptibility', - 'thermal cell parameters change', - 'thermal conductivity', - 'thermal energy gap', - 'thermal expansion', - 'thermal properties', - 'thermal strain', - 'thermodynamic properties', - 'thermodynamics', - 'thermoelectric figure of merit', - 'thermoelectric power', - 'total energy calculation data', - 'transmittance', - 'transverse sound velocity', - 'type of magnetism', - 'upper critical magnetic field', - 'vacuum ultraviolet photoemission spectra', - 'valence', - 'van vleck contribution to magnetic susceptibility', - 'vibrational spectra', - 'vickers hardness number', - 'volume change at phase transition', - 'volume change at structural transition', - 'volume change', - 'volume magnetostriction', - 'volume thermal expansion coefficient', - 'wavelength for luminescence', - 'wavenumber of longitudinal optical phonon', - 'wavenumber of optical phonon', - 'wavenumber of transverse optical phonon', - 'work function', - 'x-ray absorption near-edge spectra', - 'x-ray absorption spectra', - 'x-ray photoemission spectra', - 'young modulus', - ]; - - /* - * Methods - */ - function is_numeric(n) { - return !isNaN(parseFloat(n)) && isFinite(n); - } - - /* - * Fix chemical formula if needed - */ - function termify_formulae(input, charred) { - if (input.includes('&#')) charred = true; - const re = charred ? /Ȉ(\d);/g : /%u208(\d)/g; - input = charred ? input : escape(input); - const matches = getMatchAll(input, re); - if (matches) { - for (let i = 0; i < matches.length; i++) { - input = input.replace(matches[i][0], matches[i][1]); - } - } - return unescape(input).replace(/^\(|\)$/g, ''); - } - - /* - * User input processing: brute-force similarity check - */ - function is_like_chem_formula(chk) { - //console.log('Checking formula'); - const len = chk.length; - - let checks; - - if (len > 10) return false; - // this cannot be no-index chemical formula - else if (len === 2) { - checks = [[chk.substr(0, 1), chk.substr(1, 1)]]; - } else if (len === 3) { - checks = [ - [chk.substr(0, 1), chk.substr(1, 1), chk.substr(2, 1)], - [chk.substr(0, 1), chk.substr(1, 2)], - [chk.substr(0, 2), chk.substr(2, 1)], - ]; - } else if (len === 4) { - checks = [ - [chk.substr(0, 2), chk.substr(2, 2)], - [chk.substr(0, 2), chk.substr(2, 1), chk.substr(3, 1)], - [chk.substr(0, 1), chk.substr(1, 1), chk.substr(2, 2)], - [chk.substr(0, 1), chk.substr(1, 2), chk.substr(3, 1)], - [chk.substr(0, 1), chk.substr(1, 1), chk.substr(2, 1), chk.substr(3, 1)], - ]; - } else if (len === 5) { - checks = [ - [chk.substr(0, 1), chk.substr(1, 1), chk.substr(2, 1)], - [chk.substr(0, 1), chk.substr(1, 1), chk.substr(2, 2)], - [chk.substr(0, 2), chk.substr(2, 2), chk.substr(4, 1)], - [chk.substr(0, 1), chk.substr(1, 2), chk.substr(3, 2)], - [chk.substr(0, 1), chk.substr(1, 2), chk.substr(3, 1)], - [chk.substr(0, 2), chk.substr(2, 1), chk.substr(3, 1)], - [chk.substr(0, 2), chk.substr(2, 1), chk.substr(3, 2)], - ]; - } else { - // 6-9 - checks = [ - // NB too improbable to have 5 one-symbol elements in row - [chk.substr(0, 2), chk.substr(2, 2), chk.substr(4, 2)], // El-El-El - [chk.substr(0, 2), chk.substr(2, 1), chk.substr(3, 1), chk.substr(4, 2)], // El-E-E-El - [ - chk.substr(0, 2), - chk.substr(2, 1), - chk.substr(3, 1), - chk.substr(4, 1), - chk.substr(5, 1), - ], // El-E-E-E-E - [chk.substr(0, 2), chk.substr(2, 2), chk.substr(4, 1), chk.substr(5, 1)], // El-El-E-E - [chk.substr(0, 2), chk.substr(2, 2), chk.substr(4, 1), chk.substr(5, 2)], // El-El-E-El - [chk.substr(0, 2), chk.substr(2, 1), chk.substr(3, 2), chk.substr(5, 1)], // El-E-El-E - [chk.substr(0, 2), chk.substr(2, 1), chk.substr(3, 2), chk.substr(5, 2)], // El-E-El-El - [chk.substr(0, 1), chk.substr(1, 2), chk.substr(3, 1), chk.substr(4, 2)], // E-El-E-El - [ - chk.substr(0, 1), - chk.substr(1, 2), - chk.substr(3, 1), - chk.substr(4, 1), - chk.substr(5, 1), - ], // E-El-E-E-E - [chk.substr(0, 1), chk.substr(1, 2), chk.substr(3, 2), chk.substr(5, 1)], // E-El-El-E - [chk.substr(0, 1), chk.substr(1, 2), chk.substr(3, 2), chk.substr(5, 2)], // E-El-El-El - [ - chk.substr(0, 1), - chk.substr(1, 1), - chk.substr(2, 2), - chk.substr(4, 1), - chk.substr(5, 1), - ], // E-E-El-E-E - [chk.substr(0, 1), chk.substr(1, 1), chk.substr(2, 2), chk.substr(4, 2)], // E-E-El-El - [ - chk.substr(0, 1), - chk.substr(1, 1), - chk.substr(2, 1), - chk.substr(3, 2), - chk.substr(5, 1), - ], // E-E-E-El-E - [ - chk.substr(0, 1), - chk.substr(1, 1), - chk.substr(2, 1), - chk.substr(3, 1), - chk.substr(4, 2), - ], // E-E-E-E-El - ]; - } - //console.log(checks); - - for (let i = 0; i < checks.length; i++) { - let signals = 0; - for (let j = 0; j < checks[i].length; j++) { - if (periodic_elements_cased.includes(checks[i][j])) signals++; - - if (signals === checks[i].length) { - //console.log(checks[i]); - return true; - } - } - } - return false; - } - - /* - * Detect facets: formulae, elements, lattices, and some classes - */ - function try_uniword_facet(term) { - if (term === 'AB' || term === 'ABC' || term === 'ABCD') return ['formulae']; // special case-sensitive anonymous cases - - term = term.toLowerCase(); - - const maybe_formula = !is_numeric(term.charAt(0)); - - const dmatches = getMatchAll(term, /(\d)/g); - if (dmatches && dmatches.length > 1 && maybe_formula) return ['formulae']; // no props with more than one digit - - const imatches = getMatchAll(escape(term), /%u208(\d)/g); - if (imatches && imatches.length && maybe_formula) return ['formulae']; // no props with subscripts - - if (periodic_elements.includes(term)) return ['elements', capitalize(term)]; - else if (periodic_element_names.includes(term)) - return [ 'elements', capitalize(periodic_elements[periodic_element_names.indexOf(term)]) ]; - - if ( - term.includes('-') && - !term.split('-').some((part) => !periodic_elements.includes(part)) - ) { - return [ 'elements', term.split('-').map((el) => capitalize(el)).join('-') ]; - } - - if (['element', 'elementary', 'unitary'].includes(term)) return ['classes', 'unary']; - else if ( - term === 'quintenary' || - term === 'quinternary' || - term === 'quinternaries' || - term === 'quinaries' || - term === 'pentanary' || - term === 'pentanaries' - ) - return ['classes', 'quinary']; - else if (term === 'actinide' || term === 'actinides') return ['classes', 'actinoid']; - else if ( - term === 'lantanide' || - term === 'lantanides' || - term === 'lanthanide' || - term === 'lanthanides' || - term === 'lantanoid' || - term === 'lantanoids' - ) - return ['classes', 'lanthanoid']; - else if (term.endsWith('ite') && term.length > 4) return ['classes']; - - const chk = term - .replace(' structure', '') - .replace(' lattice', '') - .replace(' crystalline', '') - .replace(' crystal', ''); - if (lat_fgrs.includes(chk)) return ['lattices', lat_i2p[lat_p2i[chk]]]; - - if (term.length <= 9 && dmatches && maybe_formula) return ['formulae']; // no SHORT props with digits (NB L0, E1) - - return false; - } - - /* - * Detect facets: classes, props - */ - function try_multiword_facet(term, queue) { - term = term.toLowerCase(); - - let candidate = false, - combined = false, - orig = false; - - if (queue.length) { - orig = term; - combined = true; - queue.forEach(function ({ input }) { - term = `${input} ${term}`; - }); - } - //console.log("CHECKING TERM FOR MULTI-FACET: "+term); - - candidate = check_category(term, 'classes'); - if (candidate) { - if (combined) candidate.combined = true; - return candidate; - } - - candidate = check_category(term, 'props'); - if (candidate) { - if (combined) candidate.combined = true; - return candidate; - } - - let single_chk; - if (term.endsWith('s')) { - // plural-singular fixups - single_chk = term.substr(0, term.length - 1); - - if (!combined && single_chk.endsWith('ite')) - return { facet: 'classes', input: single_chk, ready: 1 }; - - candidate = check_category(single_chk, 'classes'); - if (candidate) { - if (combined) candidate.combined = true; - return candidate; - } - } - if (term.endsWith('es')) { - // plural-singular fixups - single_chk = term.substr(0, term.length - 2); - - if (single_chk === 'binari') single_chk = 'binary'; - else if (single_chk === 'ternari') single_chk = 'ternary'; - else if (single_chk === 'quaternari') single_chk = 'quaternary'; - - candidate = check_category(single_chk, 'classes'); - if (candidate) { - if (combined) candidate.combined = true; - return candidate; - } - } - - if (!candidate && orig) return try_multiword_facet(orig, []); - return false; - } - - /* - * Utility algo - */ - function check_category(term, category) { - const host = category === 'classes' ? mpds_classes : mpds_props; - - if (host.includes(term)) return { facet: category, input: term, ready: 1 }; - - const len = host.length; - const re = new RegExp(`(?:^|\\s)(${term})(?=\\s|$)`); - let idx; - for (let i = 0; i < len; i++) { - idx = host[i].search(re); - if (idx === 0) { - //console.log("Found unstrict match in "+category+" with <"+host[i]+">"); - return { facet: category, input: term, anew: 1 }; - } - } - return false; - } - - /* - * Chemical formula: ABC3 vs. SrTiO3 - */ - function is_formula_anonymous(formula) { - const detect = new RegExp(/A(\d{0,3})B(\d{0,3})(C(\d{0,3})(D(\d{0,3}))?)?\b/); - return formula.charAt(0) === 'A' && detect.test(formula); - } - - /* - * Get center and ligand information from a string - */ - function parse_ligand(string, start) { - - const center = string.slice(0, start).toLowerCase(); - - if (string.slice(start, start + 1).toLowerCase() === 'x' && string.slice(start, start + 2).toLowerCase() !== 'xe'){ - if (string.slice(start).length === 1) return [center, 'X']; - - return [center, 'X' + string.slice(start + 1)]; - } - - if (string.length === start) return [center, 'X']; - - const remainder = string.slice(start); - - if (is_numeric(remainder.slice(0, 1)) && start === 2) - return parse_ligand(string, 1); - - return [center, capitalize(remainder)]; - } - - /* - * Get center and ligand information from a string - */ - function _parse_aeatoms(string) { - - const pos = string.indexOf('-'); - - if (pos !== -1){ - const center = string.slice(0, pos), - ligand = string.slice(pos + 1); - - if (center.length > 2) return false; - - return parse_ligand(center + ligand, center.length); - } - - const trials = [2, 1], - periodic_elements_xed = ["x"].concat(periodic_elements); - - for (let start = 0; start < 2; start++){ - if (string.length >= trials[start] && periodic_elements_xed.indexOf(string.slice(0, trials[start]).toLowerCase()) !== -1){ - return parse_ligand(string, trials[start]); - } - } - return false; - } - - /** - * Get center and ligand information from a string - */ - function parse_aeatoms(string) { - - const parsed = _parse_aeatoms(string); - - if (!parsed) return ['?', '?']; - - return [ - capitalize(parsed[0]), - formula_to_tags(parsed[1]) - ]; - } - - /* - * Add HTML tags to a chemical formula as a string - */ - function formula_to_tags(string) { - - let sub = false, - html_formula = ''; - - for (let i = 0, len = string.length; i < len; i++){ - if (is_numeric(string[i]) || string[i] === '.'){ - if (!sub){ - html_formula += ''; - sub = true; - } - } else { - if (sub){ - html_formula += ''; - sub = false; - } - } - html_formula += string[i]; - } - if (sub) html_formula += ''; - return html_formula; - } - - /* - * User input processing: main algorithm - */ - function guess(inputstr) { - - // *pseudo_numerics* - /*if (inputstr.includes('c/a ') || inputstr.includes('a/b ') || inputstr.includes('b/c ')) { + /* + * Definitions + */ + const stop_words = [ + 'a', + 'about', + 'above', + 'after', + 'again', + 'against', + 'all', + 'am', + 'an', + 'and', + 'any', + 'are', + "aren't", + 'as', + 'at', + 'be', + 'because', + 'been', + 'before', + 'being', + 'below', + 'between', + 'both', + 'but', + 'by', + "can't", + 'cannot', + 'could', + "couldn't", + 'did', + "didn't", + 'do', + 'does', + "doesn't", + 'doing', + "don't", + 'down', + 'during', + 'each', + 'few', + 'for', + 'from', + 'further', + 'had', + "hadn't", + 'has', + "hasn't", + 'have', + "haven't", + 'having', + 'he', + "he'd", + "he'll", + "he's", + 'her', + 'here', + "here's", + 'hers', + 'herself', + 'him', + 'himself', + 'his', + 'how', + "how's", + 'i', + "i'd", + "i'll", + "i'm", + "i've", + 'if', + 'in', + 'into', + 'is', + "isn't", + 'it', + "it's", + 'its', + 'itself', + "let's", + 'me', + 'more', + 'most', + "mustn't", + 'my', + 'myself', + 'no', + 'nor', + 'not', + 'of', + 'off', + 'on', + 'once', + 'only', + 'or', + 'other', + 'ought', + 'our', + 'ours', + 'ourselves', + 'out', + 'over', + 'own', + 'same', + "shan't", + 'she', + "she'd", + "she'll", + "she's", + 'should', + "shouldn't", + 'so', + 'some', + 'such', + 'than', + 'that', + "that's", + 'the', + 'their', + 'theirs', + 'them', + 'themselves', + 'then', + 'there', + "there's", + 'these', + 'they', + "they'd", + "they'll", + "they're", + "they've", + 'this', + 'those', + 'through', + 'to', + 'too', + 'u', + 'under', + 'until', + 'up', + 'very', + 'was', + "wasn't", + 'we', + "we'd", + "we'll", + "we're", + "we've", + 'were', + "weren't", + 'what', + "what's", + 'when', + "when's", + 'where', + "where's", + 'which', + 'while', + 'who', + "who's", + 'whom', + 'why', + "why's", + 'with', + "won't", + 'would', + "wouldn't", + 'you', + "you'd", + "you'll", + "you're", + "you've", + 'your', + 'yours', + 'yourself', + 'yourselves', + ]; /* exact */ + + const arity_keys = [ + null, + 'unary', + 'binary', + 'ternary', + 'quaternary', + 'quinary', + 'multinary', + 'multinary', + 'multinary', + 'multinary', + 'multinary', + ]; // NB null is for "0-ary" + + const periodic_elements = [ + 'h', + 'he', + 'li', + 'be', + 'b', + 'c', + 'n', + 'o', + 'f', + 'ne', + 'na', + 'mg', + 'al', + 'si', + 'p', + 's', + 'cl', + 'ar', + 'k', + 'ca', + 'sc', + 'ti', + 'v', + 'cr', + 'mn', + 'fe', + 'co', + 'ni', + 'cu', + 'zn', + 'ga', + 'ge', + 'as', + 'se', + 'br', + 'kr', + 'rb', + 'sr', + 'y', + 'zr', + 'nb', + 'mo', + 'tc', + 'ru', + 'rh', + 'pd', + 'ag', + 'cd', + 'in', + 'sn', + 'sb', + 'te', + 'i', + 'xe', + 'cs', + 'ba', + 'la', + 'ce', + 'pr', + 'nd', + 'pm', + 'sm', + 'eu', + 'gd', + 'tb', + 'dy', + 'ho', + 'er', + 'tm', + 'yb', + 'lu', + 'hf', + 'ta', + 'w', + 're', + 'os', + 'ir', + 'pt', + 'au', + 'hg', + 'tl', + 'pb', + 'bi', + 'po', + 'at', + 'rn', + 'fr', + 'ra', + 'ac', + 'th', + 'pa', + 'u', + 'np', + 'pu', + 'am', + 'cm', + 'bk', + 'cf', + 'es', + 'fm', + 'md', + 'no', + 'lr', + 'rf', + 'db', + 'sg', + 'bh', + 'hs', + 'mt', + 'ds', + 'rg', + 'cn', + 'nh', + 'fl', + 'mc', + 'lv', + 'ts', + 'og', + ]; /* exact */ + + const periodic_elements_cased = periodic_elements.map(function (x) { + return capitalize(x); + }); + + const periodic_element_names = [ + 'hydrogen', + 'helium', + 'lithium', + 'beryllium', + 'boron', + 'carbon', + 'nitrogen', + 'oxygen', + 'fluorine', + 'neon', + 'sodium', + 'magnesium', + 'aluminium', + 'silicon', + 'phosphorus', + 'sulfur', + 'chlorine', + 'argon', + 'potassium', + 'calcium', + 'scandium', + 'titanium', + 'vanadium', + 'chromium', + 'manganese', + 'iron', + 'cobalt', + 'nickel', + 'copper', + 'zinc', + 'gallium', + 'germanium', + 'arsenic', + 'selenium', + 'bromine', + 'krypton', + 'rubidium', + 'strontium', + 'yttrium', + 'zirconium', + 'niobium', + 'molybdenum', + 'technetium', + 'ruthenium', + 'rhodium', + 'palladium', + 'silver', + 'cadmium', + 'indium', + 'tin', + 'antimony', + 'tellurium', + 'iodine', + 'xenon', + 'caesium', + 'barium', + 'lanthanum', + 'cerium', + 'praseodymium', + 'neodymium', + 'promethium', + 'samarium', + 'europium', + 'gadolinium', + 'terbium', + 'dysprosium', + 'holmium', + 'erbium', + 'thulium', + 'ytterbium', + 'lutetium', + 'hafnium', + 'tantalum', + 'tungsten', + 'rhenium', + 'osmium', + 'iridium', + 'platinum', + 'gold', + 'mercury', + 'thallium', + 'lead', + 'bismuth', + 'polonium', + 'astatine', + 'radon', + 'francium', + 'radium', + 'actinium', + 'thorium', + 'protactinium', + 'uranium', + 'neptunium', + 'plutonium', + 'americium', + 'curium', + 'berkelium', + 'californium', + 'einsteinium', + 'fermium', + 'mendelevium', + 'nobelium', + 'lawrencium', + 'rutherfordium', + 'dubnium', + 'seaborgium', + 'bohrium', + 'hassium', + 'meitnerium', + 'darmstadium', + 'roentgenium', + 'copernicium', + 'nihonium', + 'flerovium', + 'moscovium', + 'livermorium', + 'tennessine', + 'oganesson', + ]; /* fuzzy */ + + const lat_p2i = { + cubic: 1, + hexagonal: 2, + trigonal: 3, + tetragonal: 4, + orthorhombic: 5, + monoclinic: 6, + triclinic: 7, + rhombohedral: 3, + cub: 1, + hex: 2, + hexag: 2, + trig: 3, + tet: 4, + tetr: 4, + tetrag: 4, + orth: 5, + ortho: 5, + monocl: 6, + tric: 7, + tricl: 7, + rhom: 3, + rhomb: 3, + }; + const lat_fgrs = Object.keys(lat_p2i); + const lat_i2p = { + 1: 'cubic', + 2: 'hexagonal', + 3: 'trigonal', + 4: 'tetragonal', + 5: 'orthorhombic', + 6: 'monoclinic', + 7: 'triclinic', + }; + + const mpds_classes = [ + 'ab initio calculations', + 'ab initio literature', + 'actinoid', + 'adamantane', + 'aegirine', + 'alkali', + 'alkaline', + 'allargentum', + 'almandine', + 'alum', + 'alunogen', + 'amide', + 'analcime', + 'anatase', + 'anorpiment', + 'anorthoclase', + 'antiferroelectric', + 'antiferromagnet', + 'antiferromagnetic', + 'arsenate', + 'arsenide', + 'ashcroftine', + 'auricupride', + 'aurocupride', + 'azide', + 'baileychlore', + 'bariopyrochlore', + 'baryte', + 'beryl', + 'beta-alumina', + 'beta-boron', + 'biguanide', + 'binary', + 'birefringent', + 'borane', + 'borate', + 'borax', + 'boride', + 'borocarbide', + 'borohydride', + 'boronitride', + 'botryogen', + 'bromanilate', + 'bromide', + 'bromoimide', + 'calomel', + 'carbamate', + 'carbide', + 'carbonate', + 'carbonyl', + 'carboxylate', + 'celestine', + 'cell and atoms', + 'cell-only', + 'celsian', + 'cesiokenopyrochlore', + 'chalcogen', + 'charge-density wave state', + 'chevrel', + 'chimney-ladder', + 'chloranilate', + 'chlorate', + 'chloride', + 'chloritoid', + 'chlorosulfate', + 'chromate', + 'chrysoberyl', + 'chrysotile', + 'cinnabar', + 'clathrate', + 'clinochlore', + 'clinoclase', + 'clodronate', + 'close-packed', + 'cluster glass', + 'colossal magnetoresistance', + 'conductor', + 'corundum', + 'cosmochlor', + 'croconate violet', + 'croconate', + 'cryptomelane', + 'cuprate', + 'cuspidine', + 'cyamelurate', + 'cyanamide', + 'cyanamidonitrate', + 'cyananilate', + 'cyanide', + 'cyanotetrazolate', + 'cyanoureate', + 'cyanurate', + 'cyprine', + 'davyne', + 'deuteride', + 'deuterium', + 'devilline', + 'diamagnetic', + 'diamond', + 'diarsenate', + 'diaspore', + 'diazanide', + 'diazenide', + 'dichromate', + 'digermanate', + 'diiodobromide', + 'dinitramide', + 'diopside', + 'dioptase', + 'dioxobromate', + 'dioxoiodate', + 'dioxosulfate', + 'dioxothiosulfate', + 'diphosphate', + 'diphosphonate', + 'dipolyhedral', + 'diselenate', + 'disilicate', + 'disordered', + 'disulfate', + 'dithiocarbamate', + 'dithiocarbonate', + 'dithionate', + 'dithiooxalate', + 'dithiophosphate', + 'dithiosquarate', + 'divanadate', + 'epidote', + 'euchlorine', + 'euclase', + 'eudialyte', + 'eulytine', + 'fermi liquid', + 'feroxihyte', + 'feroxyhyte', + 'ferrielectric', + 'ferrimagnet', + 'ferroelastic', + 'ferroelectric', + 'ferromagnet', + 'ferromagnetic', + 'fluor-schorl', + 'fluoride', + 'fluoroborate', + 'frank-kasper', + 'friauf-laves', + 'fulleride', + 'fulminate', + 'galena', + 'gamma-brass', + 'garnet', + 'giant magnetocaloric effect', + 'gismondine', + 'glaucodot', + 'glaucophane', + 'grossular', + 'guanidinate', + 'gypsum', + 'hafnon', + 'half metal', + 'halogen', + 'hard magnet', + 'harmotome', + 'haueyne', + 'heavy fermion', + 'hedyphane', + 'helimagnet', + 'helvine', + 'hexasulfate', + 'hexathionate', + 'host-guest', + 'humboldtine', + 'hydrate', + 'hydride', + 'hydroxide', + 'hypercinnabar', + 'hypophosphate', + 'ice', + 'imide', + 'iminate', + 'intercalation', + 'intermediate valence', + 'intermetallic', + 'iodate', + 'iodide', + 'ionic conductor', + 'iridium', + 'isoferroplatinum', + 'isopolyhedral', + 'isothermal section', + 'kornerupine', + 'kosmochlor', + 'lanthanoid', + 'lavendulan', + 'levyne', + 'lime', + 'liquidus projection', + 'litharge', + 'lithiophosphate', + 'luminescent', + 'machine learning', + 'machine-learning', + 'magnesiochloritoid', + 'magnetoelastic', + 'magnetoelectric', + 'manganate', + 'massicot', + 'mellitate', + 'melonate', + 'metacinnabar', + 'metal', + 'metalloid', + 'metamagnet', + 'metavoltine', + 'mica', + 'microcline', + 'microline', + 'mictomagnet', + 'minium', + 'molybdate', + 'multiferroic', + 'multinary', + 'nasicon', + 'natron', + 'natrophosphate', + 'natroxalate', + 'negative thermal expansion', + 'nepheline', + 'nickeline', + 'niobocarbide', + 'niter', + 'nitranilate', + 'nitrate', + 'nitratine', + 'nitride', + 'nitroformate', + 'noble gas', + 'non disordered', + 'non-disordered', + 'non-linear optics', + 'nonaflate', + 'nonmetal', + 'nordenskioeldine', + 'nosean', + 'olivine', + 'optically isotropic', + 'organic', + 'orpiment', + 'orthoborate', + 'orthoclase', + 'orthogermanate', + 'orthonitrate', + 'orthophosphate', + 'orthosilicate', + 'oxalate', + 'oxamate', + 'oxide', + 'oxoiodate', + 'oxonitrate', + 'oxotetrazolate', + 'oxy-schorl', + 'ozonide', + 'palladogermanide', + 'paracelsian', + 'paramagnet', + 'pararealgar', + 'pauli paramagnet', + 'peer review', + 'peer reviewed', + 'peer-review', + 'peer-reviewed', + 'perchlorate', + 'perhydrate', + 'periclase', + 'periodate', + 'permanganate', + 'pernitride', + 'peroxide', + 'pertechnetate', + 'phosphate', + 'phosphide', + 'phosphinate', + 'phosphonate', + 'photocatalyst', + 'photovoltaic effect', + 'photovoltaic', + 'piezoelectric', + 'pnictogen', + 'polaron conductor', + 'polycrase', + 'potassic', + 'prism', + 'prussian blue', + 'pseudorutile', + 'pyrochlore', + 'pyroelectric', + 'pyrope', + 'quadridavyne', + 'quartz', + 'quasicrystal', + 'quaternary', + 'quinary', + 'radioactive', + 'rare earth', + 'realgar', + 'refractory', + 'relaxor', + 'retzian', + 'rhodarsenide', + 'rhodizonate', + 'rhomboclase', + 'rocksalt', + 'rubicline', + 'ruddlesden-popper', + 'rutheniridosmine', + 'rutherfordine', + 'rutile', + 'salammoniac', + 'sanidine', + 'sapphirine', + 'sarcopside', + 'schorl', + 'selenide', + 'selenidel', + 'semiconductor', + 'semimetal', + 'shape memory effect', + 'shape memory', + 'siderotil', + 'silanide', + 'silicate', + 'silicide', + 'sillen-aurivillius', + 'skyrmion', + 'solidus projection', + 'solvus projection', + 'spessartine', + 'spin glass', + 'spinel', + 'spodumene', + 'squarate', + 'steenstrupine', + 'stibarsen', + 'subsolidus relations', + 'sulfamate', + 'sulfamide', + 'sulfate', + 'sulfide', + 'sulfidel', + 'sulfinylamide', + 'superconductor', + 'superhard', + 'superionic conductor', + 'superoxide', + 'talc', + 'tantalcarbide', + 'tellurantimony', + 'telluride', + 'ternary', + 'tetrahedron', + 'thermoelectric', + 'thiocyanate', + 'thiocyanurate', + 'thiophosphate', + 'thiosulfate', + 'topaz', + 'topological insulator', + 'tourmaline', + 'transitional', + 'triflate', + 'tripolyhedral', + 'triteride', + 'trithionate', + 'tritide', + 'tritium', + 'trona', + 'tungstate', + 'turquoise', + 'ulvoespinel', + 'unary', + 'uranophane', + 'uranopolycrase', + 'urate', + 'urea', + 'ureate', + 'van vleck paramagnet', + 'vanadate', + 'vertical section', + 'violurate', + 'xenotime', + 'zincobotryogen', + 'zircon', + 'zircosulfate', + ]; + + // Mapping of common element groups / periods to element symbol arrays + const ELEMENT_GROUPS_MAP = { + 'period 1': ['H', 'He'], + 'period 2': ['Li', 'Be', 'B', 'C', 'N', 'O', 'F', 'Ne'], + 'period 3': ['Na', 'Mg', 'Al', 'Si', 'P', 'S', 'Cl', 'Ar'], + 'period 4': [ + 'K', + 'Ca', + 'Sc', + 'Ti', + 'V', + 'Cr', + 'Mn', + 'Fe', + 'Co', + 'Ni', + 'Cu', + 'Zn', + 'Ga', + 'Ge', + 'As', + 'Se', + 'Br', + 'Kr', + ], + 'period 5': [ + 'Rb', + 'Sr', + 'Y', + 'Zr', + 'Nb', + 'Mo', + 'Tc', + 'Ru', + 'Rh', + 'Pd', + 'Ag', + 'Cd', + 'In', + 'Sn', + 'Sb', + 'Te', + 'I', + 'Xe', + ], + 'period 6': [ + 'Cs', + 'Ba', + 'Lu', + 'Hf', + 'Ta', + 'W', + 'Re', + 'Os', + 'Ir', + 'Pt', + 'Au', + 'Hg', + 'Tl', + 'Pb', + 'Bi', + 'Po', + 'At', + 'Rn', + ], + 'period 7': ['Fr', 'Ra', 'Lr'], + + lanthanide: [ + 'La', + 'Ce', + 'Pr', + 'Nd', + 'Pm', + 'Sm', + 'Eu', + 'Gd', + 'Tb', + 'Dy', + 'Ho', + 'Er', + 'Tm', + 'Yb', + ], + actinide: [ + 'Ac', + 'Th', + 'Pa', + 'U', + 'Np', + 'Pu', + 'Am', + 'Cm', + 'Bk', + 'Cf', + 'Es', + 'Fm', + 'Md', + 'No', + ], + + alkali: ['H', 'Li', 'Na', 'K', 'Rb', 'Cs', 'Fr'], + alkaline: ['Be', 'Mg', 'Ca', 'Sr', 'Ba', 'Ra'], + + 'group 3': ['Sc', 'Y', 'Lu', 'Lr'], + 'group 4': ['Ti', 'Zr', 'Hf'], + 'group 5': ['V', 'Nb', 'Ta'], + 'group 6': ['Cr', 'Mo', 'W'], + 'group 7': ['Mn', 'Tc', 'Re'], + 'group 8': ['Fe', 'Ru', 'Os'], + 'group 9': ['Co', 'Rh', 'Ir'], + 'group 10': ['Ni', 'Pd', 'Pt'], + 'group 11': ['Cu', 'Ag', 'Au', 'Rg'], + 'group 12': ['Zn', 'Cd', 'Hg'], + + triels: ['B', 'Al', 'Ga', 'In', 'Tl'], + triel: ['B', 'Al', 'Ga', 'In', 'Tl'], + tetrels: ['C', 'Si', 'Ge', 'Sn', 'Pb', 'Fl'], + tetrel: ['C', 'Si', 'Ge', 'Sn', 'Pb', 'Fl'], + pnictogen: ['N', 'P', 'As', 'Sb', 'Bi'], + chalcogen: ['O', 'S', 'Se', 'Te', 'Po'], + halogen: ['F', 'Cl', 'Br', 'I', 'At'], + 'noble gas': ['He', 'Ne', 'Ar', 'Kr', 'Xe', 'Rn'], + }; + + function getGroupElements(name) { + if (!name) return null; + const key = name.toLowerCase().trim(); + // normalize simple plural forms + if (ELEMENT_GROUPS_MAP[key]) return ELEMENT_GROUPS_MAP[key]; + const singular = key.replace(/s$/i, ''); + if (ELEMENT_GROUPS_MAP[singular]) return ELEMENT_GROUPS_MAP[singular]; + return null; + } + + const mpds_props = [ + 'acceptor concentration', + 'acceptor to donor concentration', + 'activation energy', + 'adiabatic bulk modulus', + 'angle-resolved photoelectron spectra', + 'atomic structure', + 'band gap', + 'birefringence', + 'bremsstrahlung isochromat spectra', + 'charge carrier concentration', + 'charge carrier mobility', + 'charge transfer', + 'charge-density wave', + 'charge-transfer energy', + 'coefficient of schottky term in heat capacity', + 'coercive electric field', + 'coercive field', + 'coherence length', + 'cohesive energy', + 'compressibility', + 'conductivity', + 'core-electron contribution to magnetic susceptibility', + 'critical current density', + 'critical magnetic field', + 'crystal electric field parameter', + 'crystal electric field parameters', + 'crystal electric field splitting', + 'crystal electric field', + 'crystal field level', + 'crystalline structure', + 'crystal cell', + 'crystal structure', + 'curie coefficient', + 'curie temperature', + 'curie-weiss paramagnetism', + 'debye temperature', + 'decomposition temperature', + 'decomposition', + 'diamagnetic contribution to magnetic susceptibility', + 'dielectric constant', + 'dielectric loss tangent', + 'diffusion', + 'donor concentration', + 'donor energy', + 'effective charge', + 'effective electron number', + 'effective mass of electrons to holes ratio', + 'effective mass of electrons', + 'effective mass', + 'einstein temperature', + 'elastic compliance', + 'elastic moduli', + 'elastic stiffness coefficient', + 'elasticity', + 'electric field gradient', + 'electric polarization', + 'electrical conductivity', + 'electric properties', + 'electrical properties', + 'electrical resistivity', + 'electrochemical impedance spectroscopy', + 'electron density maps', + 'electron density of states at fermi level', + 'electron density of states', + 'electron energy band structure', + 'electron energy loss spectra', + 'electron grueneisen coefficient', + 'electron mobility', + 'electron paramagnetic resonance spectra', + 'electron spin resonance spectra', + 'electron-phonon interaction parameter', + 'electronic contribution to heat capacity', + 'electronic contribution to thermal conductivity', + 'electronic energy gap', + 'electronic heat capacity coefficient', + 'electronic properties', + 'energy at fermi level', + 'energy band structure', + 'energy gap for direct transition', + 'energy gap for indirect transition', + 'energy level diagram', + 'energy of optical phonon', + 'energy product', + 'energy', + 'enthalpy change at melting point', + 'enthalpy change at phase transition', + 'enthalpy change at structural transition', + 'enthalpy change', + 'enthalpy of formation', + 'enthalpy of reaction', + 'enthalpy', + 'entropy change at melting point', + 'entropy change at phase transition', + 'entropy of formation', + 'entropy of reaction', + 'entropy', + 'eutectoid decomposition', + 'exchange field', + 'exchange interaction parameter', + 'exciton energy', + 'extended x-ray absorption fine structure', + 'extraordinary refractive index', + 'fermi energy', + 'fermi surface', + 'ferroelasticity', + 'ferroelectric curie temperature', + 'ferroelectric hysteresis', + 'ferroelectric neel temperature', + 'ferroelectric transitions', + 'field dependence of resistivity', + 'figure of merit', + 'freezing temperature for spin glass', + 'fusion', + 'gibbs energy change', + 'gibbs energy of formation', + 'gibbs energy of reaction', + 'ginzburg-landau parameter', + 'grueneisen coefficient', + 'gruneisen coefficient', + 'hall coefficient', + 'hall effect', + 'hall mobility', + 'hardness', + 'heat capacity at constant pressure', + 'heat capacity at constant volume', + 'heat capacity coefficient', + 'heat capacity discontinuity at structural transition', + 'heat capacity discontinuity at superconducting transition', + 'heat capacity discontinuity', + 'heat capacity', + 'high-frequency permittivity', + 'hole mobility', + 'hydrogen diffusion', + 'hyperfine magnetic field', + 'imaginary part of magnetic susceptibility', + 'imaginary part of permittivity', + 'inelastic neutron scattering', + 'inelastic x-ray scattering', + 'infrared spectra', + 'ionic conductivity', + 'irreversibility field', + 'isomer shift', + 'isothermal bulk modulus', + 'isothermal linear compressibility', + 'isothermal volume compressibility', + 'knoop hardness', + 'kondo behavior', + 'kondo temperature', + 'lattice', + 'linear magnetostriction', + 'linear thermal expansion coefficient', + 'longitudinal sound velocity', + 'longitudinal-mode elastic coefficient', + 'lorentz number', + 'lower critical magnetic field', + 'lowest temperature of investigation', + 'luminescence lifetime', + 'luminescence', + 'magnetic anisotropy field', + 'magnetic anisotropy', + 'magnetic circular x-ray dichroism', + 'magnetic dichroism', + 'magnetic direction', + 'magnetic entropy', + 'magnetic field for magnetic transition', + 'magnetic field for structural transition', + 'magnetic heat capacity', + 'magnetic hysteresis', + 'magnetic moment', + 'magnetic order', + 'magnetic penetration depth', + 'magnetic phase diagram', + 'magnetic properties', + 'magnetic resistivity', + 'magnetic structure', + 'magnetic susceptibility', + 'magnetic transitions', + 'magnetism', + 'magnetization', + 'magneto-optical effects', + 'magneto-optical kerr effect', + 'magnetostriction', + 'mechanical properties', + 'melting temperature', + 'microhardness', + 'moessbauer spectra', + 'mohs hardness', + 'molar volume', + 'molecular field parameter', + 'muon spin spectra', + 'neel temperature', + 'neutron energy loss spectra', + 'non-linear optical properties', + 'non-linear optics', + 'nuclear magnetic resonance spectra', + 'nuclear quadrupolar resonance spectra', + 'optical absorption coefficient', + 'optical absorption', + 'optical conductivity', + 'optical phonons', + 'optical properties', + 'optical spectra', + 'orbital magnetic moment', + 'ordinary refractive index', + 'paraelectric curie coefficient', + 'paraelectric curie temperature', + 'paraelectric state', + 'paramagnetic curie temperature', + 'paramagnetic moment', + 'pauli magnetic susceptibility', + 'peritectic formation', + 'peritectoid formation', + 'permittivity', + 'perturbed angular correlation', + 'phase diagram', + 'phase diagrams', + 'phase transitions', + 'phonon contribution to thermal conductivity', + 'phonon density of states', + 'phonon dispersion', + 'phonon grueneisen coefficient', + 'phonon heat capacity at constant pressure', + 'phonons', + 'photo-conductivity data', + 'photo-conductivity', + 'photoelectron emission spectra', + 'photoluminescence spectra', + 'physical properties', + 'piezoelectric coefficient', + 'piezoelectric coefficient', + 'piezoelectricity', + 'plasma edge', + 'poisson ratio', + 'power factor', + 'pressure derivative of adiabatic bulk modulus', + 'pressure derivative of curie temperature', + 'pressure derivative of elastic stiffness coefficient', + 'pressure derivative of energy gap', + 'pressure derivative of isothermal bulk modulus', + 'pressure derivative of neel temperature', + 'pressure derivative of superconducting transition temperature', + 'pressure derivative of transition temperature', + 'pressure for magnetic transition', + 'pressure for metal-nonmetal transition', + 'pressure for structural transition', + 'pyroelectric coefficient', + 'pyroelectricity', + 'quadrupole splitting', + 'raman spectra', + 'real part of magnetic permeability', + 'real part of magnetic susceptibility', + 'real part of optical conductivity', + 'real part of permittivity', + 'reflectivity', + 'refractive index', + 'relative cooling power', + 'remanent induction', + 'remanent magnetic field', + 'remanent magnetic moment', + 'remanent magnetization', + 'remanent polarization', + 'residual resistivity ratio', + 'residual resistivity', + 'resistivity anisotropy', + 'resistivity', + 'resonance spectra', + 'saturation magnetic moment', + 'saturation magnetization', + 'second-harmonic generation', + 'seebeck coefficient', + 'shear modulus', + 'soft-x-ray emission spectra', + 'sound velocity', + 'spin contribution to magnetic susceptibility', + 'spin magnetic moment', + 'spin-fluctuation temperature', + 'spin-fluctuation', + 'spin-orbit splitting of valence band', + 'spin-resolved electron density of states at fermi level', + 'spontaneous elastic strain', + 'spontaneous magnetic moment', + 'spontaneous magnetization', + 'spontaneous polarization', + 'static permittivity', + 'stoner enhancement factor', + 'stoner parameter', + 'stoner product', + 'structural properties', + 'structural transition', + 'structural transitions', + 'superconducting transition temperature', + 'superconductivity energy gap', + 'superconductivity phenomena', + 'superconductivity', + 'temperature dependence of resistivity', + 'temperature dependence of static permittivity', + 'temperature derivative of elastic stiffness coefficient', + 'temperature derivative of energy gap', + 'temperature derivative of resistivity', + 'temperature derivative of upper critical magnetic field', + 'temperature for congruent melting', + 'temperature for eutectoid decomposition', + 'temperature for ferroelectric reordering', + 'temperature for magnetic transition', + 'temperature for metal-nonmetal transition', + 'temperature for peritectic formation', + 'temperature for peritectoid formation', + 'temperature for structural transition', + 'temperature-independent part of magnetic susceptibility', + 'thermal cell parameters change', + 'thermal conductivity', + 'thermal energy gap', + 'thermal expansion', + 'thermal properties', + 'thermal strain', + 'thermodynamic properties', + 'thermodynamics', + 'thermoelectric figure of merit', + 'thermoelectric power', + 'total energy calculation data', + 'transmittance', + 'transverse sound velocity', + 'type of magnetism', + 'upper critical magnetic field', + 'vacuum ultraviolet photoemission spectra', + 'valence', + 'van vleck contribution to magnetic susceptibility', + 'vibrational spectra', + 'vickers hardness number', + 'volume change at phase transition', + 'volume change at structural transition', + 'volume change', + 'volume magnetostriction', + 'volume thermal expansion coefficient', + 'wavelength for luminescence', + 'wavenumber of longitudinal optical phonon', + 'wavenumber of optical phonon', + 'wavenumber of transverse optical phonon', + 'work function', + 'x-ray absorption near-edge spectra', + 'x-ray absorption spectra', + 'x-ray photoemission spectra', + 'young modulus', + ]; + + /* + * Methods + */ + function is_numeric(n) { + return !isNaN(parseFloat(n)) && isFinite(n); + } + + /* + * Fix chemical formula if needed + */ + function termify_formulae(input, charred) { + if (input.includes('&#')) charred = true; + const re = charred ? /Ȉ(\d);/g : /%u208(\d)/g; + input = charred ? input : escape(input); + const matches = getMatchAll(input, re); + if (matches) { + for (let i = 0; i < matches.length; i++) { + input = input.replace(matches[i][0], matches[i][1]); + } + } + return unescape(input).replace(/^\(|\)$/g, ''); + } + + /* + * User input processing: brute-force similarity check + */ + function is_like_chem_formula(chk) { + //console.log('Checking formula'); + const len = chk.length; + + let checks; + + if (len > 10) return false; + // this cannot be no-index chemical formula + else if (len === 2) { + checks = [[chk.substr(0, 1), chk.substr(1, 1)]]; + } else if (len === 3) { + checks = [ + [chk.substr(0, 1), chk.substr(1, 1), chk.substr(2, 1)], + [chk.substr(0, 1), chk.substr(1, 2)], + [chk.substr(0, 2), chk.substr(2, 1)], + ]; + } else if (len === 4) { + checks = [ + [chk.substr(0, 2), chk.substr(2, 2)], + [chk.substr(0, 2), chk.substr(2, 1), chk.substr(3, 1)], + [chk.substr(0, 1), chk.substr(1, 1), chk.substr(2, 2)], + [chk.substr(0, 1), chk.substr(1, 2), chk.substr(3, 1)], + [ + chk.substr(0, 1), + chk.substr(1, 1), + chk.substr(2, 1), + chk.substr(3, 1), + ], + ]; + } else if (len === 5) { + checks = [ + [chk.substr(0, 1), chk.substr(1, 1), chk.substr(2, 1)], + [chk.substr(0, 1), chk.substr(1, 1), chk.substr(2, 2)], + [chk.substr(0, 2), chk.substr(2, 2), chk.substr(4, 1)], + [chk.substr(0, 1), chk.substr(1, 2), chk.substr(3, 2)], + [chk.substr(0, 1), chk.substr(1, 2), chk.substr(3, 1)], + [chk.substr(0, 2), chk.substr(2, 1), chk.substr(3, 1)], + [chk.substr(0, 2), chk.substr(2, 1), chk.substr(3, 2)], + ]; + } else { + // 6-9 + checks = [ + // NB too improbable to have 5 one-symbol elements in row + [chk.substr(0, 2), chk.substr(2, 2), chk.substr(4, 2)], // El-El-El + [ + chk.substr(0, 2), + chk.substr(2, 1), + chk.substr(3, 1), + chk.substr(4, 2), + ], // El-E-E-El + [ + chk.substr(0, 2), + chk.substr(2, 1), + chk.substr(3, 1), + chk.substr(4, 1), + chk.substr(5, 1), + ], // El-E-E-E-E + [ + chk.substr(0, 2), + chk.substr(2, 2), + chk.substr(4, 1), + chk.substr(5, 1), + ], // El-El-E-E + [ + chk.substr(0, 2), + chk.substr(2, 2), + chk.substr(4, 1), + chk.substr(5, 2), + ], // El-El-E-El + [ + chk.substr(0, 2), + chk.substr(2, 1), + chk.substr(3, 2), + chk.substr(5, 1), + ], // El-E-El-E + [ + chk.substr(0, 2), + chk.substr(2, 1), + chk.substr(3, 2), + chk.substr(5, 2), + ], // El-E-El-El + [ + chk.substr(0, 1), + chk.substr(1, 2), + chk.substr(3, 1), + chk.substr(4, 2), + ], // E-El-E-El + [ + chk.substr(0, 1), + chk.substr(1, 2), + chk.substr(3, 1), + chk.substr(4, 1), + chk.substr(5, 1), + ], // E-El-E-E-E + [ + chk.substr(0, 1), + chk.substr(1, 2), + chk.substr(3, 2), + chk.substr(5, 1), + ], // E-El-El-E + [ + chk.substr(0, 1), + chk.substr(1, 2), + chk.substr(3, 2), + chk.substr(5, 2), + ], // E-El-El-El + [ + chk.substr(0, 1), + chk.substr(1, 1), + chk.substr(2, 2), + chk.substr(4, 1), + chk.substr(5, 1), + ], // E-E-El-E-E + [ + chk.substr(0, 1), + chk.substr(1, 1), + chk.substr(2, 2), + chk.substr(4, 2), + ], // E-E-El-El + [ + chk.substr(0, 1), + chk.substr(1, 1), + chk.substr(2, 1), + chk.substr(3, 2), + chk.substr(5, 1), + ], // E-E-E-El-E + [ + chk.substr(0, 1), + chk.substr(1, 1), + chk.substr(2, 1), + chk.substr(3, 1), + chk.substr(4, 2), + ], // E-E-E-E-El + ]; + } + //console.log(checks); + + for (let i = 0; i < checks.length; i++) { + let signals = 0; + for (let j = 0; j < checks[i].length; j++) { + if (periodic_elements_cased.includes(checks[i][j])) signals++; + + if (signals === checks[i].length) { + //console.log(checks[i]); + return true; + } + } + } + return false; + } + + /* + * Detect facets: formulae, elements, lattices, and some classes + */ + function try_uniword_facet(term) { + if (term === 'AB' || term === 'ABC' || term === 'ABCD') return ['formulae']; // special case-sensitive anonymous cases + + term = term.toLowerCase(); + + const maybe_formula = !is_numeric(term.charAt(0)); + + const dmatches = getMatchAll(term, /(\d)/g); + if (dmatches && dmatches.length > 1 && maybe_formula) return ['formulae']; // no props with more than one digit + + const imatches = getMatchAll(escape(term), /%u208(\d)/g); + if (imatches && imatches.length && maybe_formula) return ['formulae']; // no props with subscripts + + if (periodic_elements.includes(term)) return ['elements', capitalize(term)]; + else if (periodic_element_names.includes(term)) + return [ + 'elements', + capitalize(periodic_elements[periodic_element_names.indexOf(term)]), + ]; + + if ( + term.includes('-') && + !term.split('-').some((part) => !periodic_elements.includes(part)) + ) { + return [ + 'elements', + term + .split('-') + .map((el) => capitalize(el)) + .join('-'), + ]; + } + + if (['element', 'elementary', 'unitary'].includes(term)) + return ['classes', 'unary']; + else if ( + term === 'quintenary' || + term === 'quinternary' || + term === 'quinternaries' || + term === 'quinaries' || + term === 'pentanary' || + term === 'pentanaries' + ) + return ['classes', 'quinary']; + else if (term === 'actinide' || term === 'actinides') + return ['classes', 'actinoid']; + else if ( + term === 'lantanide' || + term === 'lantanides' || + term === 'lanthanide' || + term === 'lanthanides' || + term === 'lantanoid' || + term === 'lantanoids' + ) + return ['classes', 'lanthanoid']; + else if (term.endsWith('ite') && term.length > 4) return ['classes']; + + // direct element-group single-word matches (tetrels, triels, chalcogen, etc.) + const groupEls = getGroupElements(term); + if (groupEls) return ['elements', groupEls.join('-')]; + + const chk = term + .replace(' structure', '') + .replace(' lattice', '') + .replace(' crystalline', '') + .replace(' crystal', ''); + if (lat_fgrs.includes(chk)) return ['lattices', lat_i2p[lat_p2i[chk]]]; + + if (term.length <= 9 && dmatches && maybe_formula) return ['formulae']; // no SHORT props with digits (NB L0, E1) + + return false; + } + + /* + * Detect facets: classes, props + */ + function try_multiword_facet(term, queue) { + term = term.toLowerCase(); + + let candidate = false, + combined = false, + orig = false; + + if (queue.length) { + orig = term; + combined = true; + queue.forEach(function ({ input }) { + term = `${input} ${term}`; + }); + } + //console.log("CHECKING TERM FOR MULTI-FACET: "+term); + + // Special handling: phrases like 'period 2' or 'group 11' + const pgMatch = term.match(/^(period|group)\s+(\d{1,2})$/); + if (pgMatch) { + const kind = pgMatch[1]; + const num = pgMatch[2]; + const key = `${kind} ${num}`; + const els = getGroupElements(key); + if (els) return { facet: 'elements', input: els.join('-'), ready: 1 }; + } + + candidate = check_category(term, 'classes'); + if (candidate) { + if (combined) candidate.combined = true; + return candidate; + } + + candidate = check_category(term, 'props'); + if (candidate) { + if (combined) candidate.combined = true; + return candidate; + } + + let single_chk; + if (term.endsWith('s')) { + // plural-singular fixups + single_chk = term.substr(0, term.length - 1); + + if (!combined && single_chk.endsWith('ite')) + return { facet: 'classes', input: single_chk, ready: 1 }; + + candidate = check_category(single_chk, 'classes'); + if (candidate) { + if (combined) candidate.combined = true; + return candidate; + } + } + if (term.endsWith('es')) { + // plural-singular fixups + single_chk = term.substr(0, term.length - 2); + + if (single_chk === 'binari') single_chk = 'binary'; + else if (single_chk === 'ternari') single_chk = 'ternary'; + else if (single_chk === 'quaternari') single_chk = 'quaternary'; + + candidate = check_category(single_chk, 'classes'); + if (candidate) { + if (combined) candidate.combined = true; + return candidate; + } + } + + if (!candidate && orig) return try_multiword_facet(orig, []); + return false; + } + + /* + * Utility algo + */ + function check_category(term, category) { + const host = category === 'classes' ? mpds_classes : mpds_props; + + if (host.includes(term)) return { facet: category, input: term, ready: 1 }; + + const len = host.length; + const re = new RegExp(`(?:^|\\s)(${term})(?=\\s|$)`); + let idx; + for (let i = 0; i < len; i++) { + idx = host[i].search(re); + if (idx === 0) { + //console.log("Found unstrict match in "+category+" with <"+host[i]+">"); + return { facet: category, input: term, anew: 1 }; + } + } + return false; + } + + /* + * Chemical formula: ABC3 vs. SrTiO3 + */ + function is_formula_anonymous(formula) { + const detect = new RegExp( + /A(\d{0,3})B(\d{0,3})(C(\d{0,3})(D(\d{0,3}))?)?\b/ + ); + return formula.charAt(0) === 'A' && detect.test(formula); + } + + /* + * Get center and ligand information from a string + */ + function parse_ligand(string, start) { + const center = string.slice(0, start).toLowerCase(); + + if ( + string.slice(start, start + 1).toLowerCase() === 'x' && + string.slice(start, start + 2).toLowerCase() !== 'xe' + ) { + if (string.slice(start).length === 1) return [center, 'X']; + + return [center, 'X' + string.slice(start + 1)]; + } + + if (string.length === start) return [center, 'X']; + + const remainder = string.slice(start); + + if (is_numeric(remainder.slice(0, 1)) && start === 2) + return parse_ligand(string, 1); + + return [center, capitalize(remainder)]; + } + + /* + * Get center and ligand information from a string + */ + function _parse_aeatoms(string) { + const pos = string.indexOf('-'); + + if (pos !== -1) { + const center = string.slice(0, pos), + ligand = string.slice(pos + 1); + + if (center.length > 2) return false; + + return parse_ligand(center + ligand, center.length); + } + + const trials = [2, 1], + periodic_elements_xed = ['x'].concat(periodic_elements); + + for (let start = 0; start < 2; start++) { + if ( + string.length >= trials[start] && + periodic_elements_xed.indexOf( + string.slice(0, trials[start]).toLowerCase() + ) !== -1 + ) { + return parse_ligand(string, trials[start]); + } + } + return false; + } + + /** + * Get center and ligand information from a string + */ + function parse_aeatoms(string) { + const parsed = _parse_aeatoms(string); + + if (!parsed) return ['?', '?']; + + return [capitalize(parsed[0]), formula_to_tags(parsed[1])]; + } + + /* + * Add HTML tags to a chemical formula as a string + */ + function formula_to_tags(string) { + let sub = false, + html_formula = ''; + + for (let i = 0, len = string.length; i < len; i++) { + if (is_numeric(string[i]) || string[i] === '.') { + if (!sub) { + html_formula += ''; + sub = true; + } + } else { + if (sub) { + html_formula += ''; + sub = false; + } + } + html_formula += string[i]; + } + if (sub) html_formula += ''; + return html_formula; + } + + /* + * User input processing: main algorithm + */ + function guess(inputstr) { + // *pseudo_numerics* + /*if (inputstr.includes('c/a ') || inputstr.includes('a/b ') || inputstr.includes('b/c ')) { // FIXME slashes in names if (inputstr.includes('c/a ')) inputstr = inputstr.replace('c/a ', 'c--a '); if (inputstr.includes('a/b ')) inputstr = inputstr.replace('a/b ', 'a--b '); if (inputstr.includes('b/c ')) inputstr = inputstr.replace('b/c ', 'b--c '); }*/ - if (inputstr.includes('\"')) return {'ignored': inputstr}; // Optimade guard - - const tokens = inputstr - .replace(new RegExp('\\+|\\!|\\?', 'g'), '') - .replace(new RegExp(',|/', 'g'), ' ') - .replace(new RegExp('<', 'g'), ' < ') - .replace(new RegExp('>', 'g'), ' > ') - .replace(new RegExp('=', 'g'), ' = ') - .split(/\s+/); - - const result = {}; - let n_terms = 0; - let n_toks = 1; - let queue = []; - const ignored = []; - //console.log(tokens); - // TODO: gracefully discard brackets - - tokens.forEach(function (input) { - let facet = false, - simple = false; - input = input.trim(); - - if ( - input.includes('<') || - input.includes('>') || - input.includes('=') || - is_numeric(input) - ) { - // numeric searches - if ( - input.indexOf('<') === 0 || - input.indexOf('>') === 0 || - input.indexOf('=') === 0 - ) { - // separated op sign - if (result.props) { - if (!result.numeric) result.numeric = []; - result.numeric.push([result.props, input.substr(0, 1)]); - delete result.props; - } else if (result.numeric && result.numeric[result.numeric.length - 1]) { - result.numeric[result.numeric.length - 1][1] = input.substr(0, 1); // TODO account <=, =<, >=, => - } else ignored.push(input); - } else if (is_numeric(input)) { - if (result.props) { - if (!result.numeric) result.numeric = []; - result.numeric.push([result.props, '=', parseFloat(input)]); - delete result.props; - } else if (result.numeric && result.numeric[result.numeric.length - 1]) { - result.numeric[result.numeric.length - 1][2] = parseFloat(input); // NB no commas! - if (!result.numeric[result.numeric.length - 1][1]) - result.numeric[result.numeric.length - 1][1] = '='; - } else ignored.push(input); - } - return; - } - - if (!queue.length || (input != 'at' && input != 'in')) { - // FIXME TODO special treatment, words vs. chemical symbols - simple = try_uniword_facet(input); - } - - if (simple) { - facet = simple[0]; - if (simple[1]) input = simple[1]; - if (queue.length && !queue[queue.length - 1].ready) - ignored.push(...queue.map((obj) => obj.input)); - queue = []; - //console.log(input + ": found simple facet " + simple[0]); - } else { - if (!stop_words.includes(input)) { - input = input - .replace(new RegExp('\\(', 'g'), '') - .replace(new RegExp('\\)', 'g'), '') - .replace(new RegExp('\\[', 'g'), '') - .replace(new RegExp('\\]', 'g'), ''); - - const candidate = try_multiword_facet(input, queue); - //console.log(candidate); - - if (candidate.combined) queue = []; - - if (candidate.ready) { - // term found either alone or with previous - facet = candidate.facet; - input = candidate.input; - queue = [candidate]; - } else if (candidate.anew) { - // token anew - if (queue.length && !queue[queue.length - 1].ready) - ignored.push(...queue.map((obj) => obj.input)); - queue = [candidate]; - } else if (!candidate) { - // token unknown - if (queue.length && !queue[queue.length - 1].ready) - ignored.push(...queue.map((obj) => obj.input)); - queue = []; - if (is_like_chem_formula(input)) { - facet = 'formulae'; - } - if (!facet) { - ignored.push(input); - } - } - } - } - - if (n_toks === tokens.length) { - // token at the end, terminating - if (queue.length && !queue[queue.length - 1].ready) - ignored.push(...queue.map((obj) => obj.input)); - queue = []; - } - - if (facet) { - if (facet === 'formulae') input = termify_formulae(input); - - if (result[facet]) { - // What to do with the found term of the same category? - //console.log('Compare: '+result[facet]+' vs. '+input); - - if (facet === 'formulae') { - ignored.push(input); - } else if (facet === 'elements') { - result[facet] += `-${input}`; - } else if (facet === 'classes') { - result[facet] += `, ${input}`; //escape(input); - } else if (facet === 'props') { - if (input.includes(result[facet])) { - //console.log('Smaller match '+result[facet]+' was thrown away'); - result[facet] = input; - } else { - ignored.push(input); - } - } - } else result[facet] = input; //escape(input); - - n_terms++; - } - n_toks++; - }); - - result.ignored = ignored; - return result; - } - - /* - * Chemical formulae parsing, courtesy of Nathan Leung, MIT license - */ - function tokenizeInitial(formula, tokens = []) { - if (formula.length === 0) { - return tokens; - } - - // Tokenize parentheses - if (formula[0] === '(' || formula[0] === '[') { - return tokenizeInitial(formula.substr(1), [ - ...tokens, { - type: 'parenthesis', - value: 'open', - }, - ]); - } - if (formula[0] === ')' || formula[0] === ']') { - return tokenizeAfterElement(formula.substr(1), [ - ...tokens, { - type: 'parenthesis', - value: 'close', - }, - ]); - } - - // Tokenize coefficient - let num = ''; - for (let i = 0; i < formula.length; i++) { - // If the char is a number - if (!isNaN(Number(formula[i]))) { - num = num + formula[i]; - } else { - // End loop if char is not a number - break; - } - } - // If a coefficient exists - if (num !== '') { - return tokenizeInitial(formula.substr(num.length), [ - ...tokens, { - type: 'coefficient', - value: Number(num), - }, - ]); - } - - // Tokenize element - // Check if current char is uppercase letter - const char = formula[0]; - const code = char.charCodeAt(0); - if (code >= 65 && code <= 90) { - // If next char is a lowercase letter - if (formula.length > 1) { - const nextChar = formula[1]; - const nextCode = nextChar.charCodeAt(0); - if (nextCode >= 97 && nextCode <= 122) { - return tokenizeAfterElement(formula.substr(2), [ - ...tokens, { - type: 'element', - value: char + nextChar, - }, - ]); - } - } - return tokenizeAfterElement(formula.substr(1), [ - ...tokens, { - type: 'element', - value: char, - }, - ]); - } - - // If it doesn't match anything - throw Error(`There was an error parsing formula. We were able to get to here:\n${JSON.stringify(tokens, null, 2)} + if (inputstr.includes('"')) return { ignored: inputstr }; // Optimade guard + + const tokens = inputstr + .replace(new RegExp('\\+|\\!|\\?', 'g'), '') + .replace(new RegExp(',|/', 'g'), ' ') + .replace(new RegExp('<', 'g'), ' < ') + .replace(new RegExp('>', 'g'), ' > ') + .replace(new RegExp('=', 'g'), ' = ') + .split(/\s+/); + + const result = {}; + let n_terms = 0; + let n_toks = 1; + let queue = []; + const ignored = []; + //console.log(tokens); + // TODO: gracefully discard brackets + + tokens.forEach(function (input) { + let facet = false, + simple = false; + input = input.trim(); + const linput = input.toLowerCase(); + + // Allow 'period' and 'group' to be queued so they can combine with a following number + if (linput === 'period' || linput === 'group') { + // push a not-ready candidate to queue and continue + queue.push({ input: linput, ready: 0 }); + n_toks++; + return; + } + + if ( + input.includes('<') || + input.includes('>') || + input.includes('=') || + is_numeric(input) + ) { + // numeric searches + // If this numeric token is actually the second part of 'period N' or 'group N', + // don't treat it as a numeric filter but allow multiword facet combining. + if ( + is_numeric(input) && + queue.length && + (queue[queue.length - 1].input === 'period' || + queue[queue.length - 1].input === 'group') + ) { + // fall-through to normal multiword processing + } else if ( + input.indexOf('<') === 0 || + input.indexOf('>') === 0 || + input.indexOf('=') === 0 + ) { + // separated op sign + if (result.props) { + if (!result.numeric) result.numeric = []; + result.numeric.push([result.props, input.substr(0, 1)]); + delete result.props; + } else if ( + result.numeric && + result.numeric[result.numeric.length - 1] + ) { + result.numeric[result.numeric.length - 1][1] = input.substr(0, 1); // TODO account <=, =<, >=, => + } else ignored.push(input); + } else if (is_numeric(input)) { + if (result.props) { + if (!result.numeric) result.numeric = []; + result.numeric.push([result.props, '=', parseFloat(input)]); + delete result.props; + } else if ( + result.numeric && + result.numeric[result.numeric.length - 1] + ) { + result.numeric[result.numeric.length - 1][2] = parseFloat(input); // NB no commas! + if (!result.numeric[result.numeric.length - 1][1]) + result.numeric[result.numeric.length - 1][1] = '='; + } else ignored.push(input); + } + return; + } + + if (!queue.length || (input != 'at' && input != 'in')) { + // FIXME TODO special treatment, words vs. chemical symbols + simple = try_uniword_facet(input); + } + + if (simple) { + facet = simple[0]; + if (simple[1]) input = simple[1]; + if (queue.length && !queue[queue.length - 1].ready) + ignored.push(...queue.map((obj) => obj.input)); + queue = []; + //console.log(input + ": found simple facet " + simple[0]); + } else { + if (!stop_words.includes(input)) { + input = input + .replace(new RegExp('\\(', 'g'), '') + .replace(new RegExp('\\)', 'g'), '') + .replace(new RegExp('\\[', 'g'), '') + .replace(new RegExp('\\]', 'g'), ''); + + const candidate = try_multiword_facet(input, queue); + //console.log(candidate); + + if (candidate.combined) queue = []; + + if (candidate.ready) { + // term found either alone or with previous + facet = candidate.facet; + input = candidate.input; + queue = [candidate]; + } else if (candidate.anew) { + // token anew + if (queue.length && !queue[queue.length - 1].ready) + ignored.push(...queue.map((obj) => obj.input)); + queue = [candidate]; + } else if (!candidate) { + // token unknown + if (queue.length && !queue[queue.length - 1].ready) + ignored.push(...queue.map((obj) => obj.input)); + queue = []; + if (is_like_chem_formula(input)) { + facet = 'formulae'; + } + if (!facet) { + ignored.push(input); + } + } + } + } + + if (n_toks === tokens.length) { + // token at the end, terminating + if (queue.length && !queue[queue.length - 1].ready) + ignored.push(...queue.map((obj) => obj.input)); + queue = []; + } + + if (facet) { + if (facet === 'formulae') input = termify_formulae(input); + + if (result[facet]) { + // What to do with the found term of the same category? + //console.log('Compare: '+result[facet]+' vs. '+input); + + if (facet === 'formulae') { + ignored.push(input); + } else if (facet === 'elements') { + result[facet] += `-${input}`; + } else if (facet === 'classes') { + result[facet] += `, ${input}`; //escape(input); + } else if (facet === 'props') { + if (input.includes(result[facet])) { + //console.log('Smaller match '+result[facet]+' was thrown away'); + result[facet] = input; + } else { + ignored.push(input); + } + } + } else result[facet] = input; //escape(input); + + n_terms++; + } + n_toks++; + }); + + result.ignored = ignored; + return result; + } + + /* + * Chemical formulae parsing, courtesy of Nathan Leung, MIT license + */ + function tokenizeInitial(formula, tokens = []) { + if (formula.length === 0) { + return tokens; + } + + // Tokenize parentheses + if (formula[0] === '(' || formula[0] === '[') { + return tokenizeInitial(formula.substr(1), [ + ...tokens, + { + type: 'parenthesis', + value: 'open', + }, + ]); + } + if (formula[0] === ')' || formula[0] === ']') { + return tokenizeAfterElement(formula.substr(1), [ + ...tokens, + { + type: 'parenthesis', + value: 'close', + }, + ]); + } + + // Tokenize coefficient + let num = ''; + for (let i = 0; i < formula.length; i++) { + // If the char is a number + if (!isNaN(Number(formula[i]))) { + num = num + formula[i]; + } else { + // End loop if char is not a number + break; + } + } + // If a coefficient exists + if (num !== '') { + return tokenizeInitial(formula.substr(num.length), [ + ...tokens, + { + type: 'coefficient', + value: Number(num), + }, + ]); + } + + // Tokenize element + // Check if current char is uppercase letter + const char = formula[0]; + const code = char.charCodeAt(0); + if (code >= 65 && code <= 90) { + // If next char is a lowercase letter + if (formula.length > 1) { + const nextChar = formula[1]; + const nextCode = nextChar.charCodeAt(0); + if (nextCode >= 97 && nextCode <= 122) { + return tokenizeAfterElement(formula.substr(2), [ + ...tokens, + { + type: 'element', + value: char + nextChar, + }, + ]); + } + } + return tokenizeAfterElement(formula.substr(1), [ + ...tokens, + { + type: 'element', + value: char, + }, + ]); + } + + // If it doesn't match anything + throw Error(`There was an error parsing formula. We were able to get to here:\n${JSON.stringify( + tokens, + null, + 2 + )} \n\nRemaining formula: ${formula}`); - } - - /* - * Chemical formulae parsing, courtesy of Nathan Leung, MIT license - */ - function tokenizeAfterElement(formula, tokens) { - if (formula.length === 0) { - return tokenizeInitial(formula, tokens); - } - - // Tokenize subscript - // This is repeated from tokenizeInitial, perhaps - // take it out - let num = ''; - for (let i = 0; i < formula.length; i++) { - // If the char is a number - if (!isNaN(Number(formula[i]))) { - num = num + formula[i]; - } else { - // End loop if char is not a number - break; - } - } - // If a subscript exists - if (num !== '') { - return tokenizeInitial(formula.substr(num.length), [ - ...tokens, { - type: 'subscript', - value: Number(num), - }, - ]); - } - - // If it's not anything, then pass back to tokenizeInitial - return tokenizeInitial(formula, tokens); - } - - /* - * Chemical formulae parsing, courtesy of Nathan Leung, MIT license - */ - function countElements(tokens, elements = {}) { - if (tokens.length === 0) { - return elements; - } - // Create keys for each element present in the tokens array - if (Object.keys(elements).length === 0) { - for (let i = 0; i < tokens.length; i++) { - if (tokens[i].type === 'element') { - if (typeof elements[tokens[i].value] === 'undefined') { - elements[tokens[i].value] = 0; - } - } - } - } - - for (let i = 0; i < tokens.length; i++) { - if (tokens[i].type === 'coefficient') { - return countInGroup(tokens.slice(1), elements, tokens[i].value); - } else if (tokens[i].type === 'element') { - return countInGroup(tokens, elements); - } else if (tokens[i].type === 'parenthesis') { - if (tokens[i].value === 'open') { - for (let j = i; j < tokens.length; j++) { - if (tokens[j].type === 'parenthesis' && tokens[j].value === 'close') { - // There will always be a subscript after a parenthetical group - if ( - tokens.length > (j + 1) && - tokens[j].type === 'parenthesis' && - tokens[j].value === 'close' - ) { - if (tokens[j + 1].type === 'subscript') { - return countInGroup(tokens.slice(1), elements, tokens[j + 1].value); - } - } - //console.error(JSON.stringify(tokens, null, 2)); - //console.error(JSON.stringify(elements, null, 2)) - throw Error('Parenthetical group must have subscript'); - } - } - return countInGroup(tokens.slice(1), elements); - } else { - // Skip closing parenthesis and subscript - return countInGroup(tokens.slice(2), elements); - } - } - } - - // If none of the above cases are covered then throw an error - throw Error(`There was an error:\n\n${JSON.stringify(tokens, null, 2)} + } + + /* + * Chemical formulae parsing, courtesy of Nathan Leung, MIT license + */ + function tokenizeAfterElement(formula, tokens) { + if (formula.length === 0) { + return tokenizeInitial(formula, tokens); + } + + // Tokenize subscript + // This is repeated from tokenizeInitial, perhaps + // take it out + let num = ''; + for (let i = 0; i < formula.length; i++) { + // If the char is a number + if (!isNaN(Number(formula[i]))) { + num = num + formula[i]; + } else { + // End loop if char is not a number + break; + } + } + // If a subscript exists + if (num !== '') { + return tokenizeInitial(formula.substr(num.length), [ + ...tokens, + { + type: 'subscript', + value: Number(num), + }, + ]); + } + + // If it's not anything, then pass back to tokenizeInitial + return tokenizeInitial(formula, tokens); + } + + /* + * Chemical formulae parsing, courtesy of Nathan Leung, MIT license + */ + function countElements(tokens, elements = {}) { + if (tokens.length === 0) { + return elements; + } + // Create keys for each element present in the tokens array + if (Object.keys(elements).length === 0) { + for (let i = 0; i < tokens.length; i++) { + if (tokens[i].type === 'element') { + if (typeof elements[tokens[i].value] === 'undefined') { + elements[tokens[i].value] = 0; + } + } + } + } + + for (let i = 0; i < tokens.length; i++) { + if (tokens[i].type === 'coefficient') { + return countInGroup(tokens.slice(1), elements, tokens[i].value); + } else if (tokens[i].type === 'element') { + return countInGroup(tokens, elements); + } else if (tokens[i].type === 'parenthesis') { + if (tokens[i].value === 'open') { + for (let j = i; j < tokens.length; j++) { + if ( + tokens[j].type === 'parenthesis' && + tokens[j].value === 'close' + ) { + // There will always be a subscript after a parenthetical group + if ( + tokens.length > j + 1 && + tokens[j].type === 'parenthesis' && + tokens[j].value === 'close' + ) { + if (tokens[j + 1].type === 'subscript') { + return countInGroup( + tokens.slice(1), + elements, + tokens[j + 1].value + ); + } + } + //console.error(JSON.stringify(tokens, null, 2)); + //console.error(JSON.stringify(elements, null, 2)) + throw Error('Parenthetical group must have subscript'); + } + } + return countInGroup(tokens.slice(1), elements); + } else { + // Skip closing parenthesis and subscript + return countInGroup(tokens.slice(2), elements); + } + } + } + + // If none of the above cases are covered then throw an error + throw Error(`There was an error:\n\n${JSON.stringify(tokens, null, 2)} \n\n${JSON.stringify(elements, null, 2)}`); - } - - /* - * Chemical formulae parsing, courtesy of Nathan Leung, MIT license - */ - function countInGroup(tokens, elements, coefficient = 1) { - if (tokens.length === 0) { - return countElements(tokens, elements); - } - - if (tokens[0].type !== 'element') { - // If this happens this is likely a mistake, pass back to countElements - return countElements(tokens, elements); - } - - let i = 0; - while (i < tokens.length) { - if (tokens[i].type === 'element') { - if (tokens.length > (i + 1) && tokens[i + 1].type === 'subscript') { - elements[tokens[i].value] += (tokens[i + 1].value * coefficient); - i += 2; - continue; - } - elements[tokens[i].value] += coefficient; - i++; - continue; - } - break; - } - return countElements(tokens.slice(i), elements); - } - - /* - * Sorting the formulae alphabetically for Optimade - */ - function sort_formula(text) { - let parsed = false; - try { - parsed = countElements(tokenizeInitial(text)); - } catch (err) { - //console.error(err); - return text; - } - let sorted = '', - els = Object.keys(parsed); - els.sort(); - els.forEach(function (el) { - sorted += el + (parsed[el] === 1 ? '' : parsed[el]); - }); - return sorted; - } - - /* - * Convert MPDS search query object notation into the Optimade filter - */ - function to_optimade(parsed) { - const filter = []; - - ['formulae', 'elements', 'props', 'classes'].forEach(function (categ) { - if (!parsed[categ]) return; - else if (categ === 'formulae') { - if (is_formula_anonymous(parsed[categ])) - filter.push(`chemical_formula_anonymous="${parsed[categ]}"`); - else filter.push(`chemical_formula_reduced="${sort_formula(parsed[categ])}"`); - } else if (categ === 'elements') { - filter.push(`elements HAS ALL "${parsed[categ].split('-').join('","')}"`); - } else if (categ === 'props') { - filter.push(`_mpds_${parsed[categ].replaceAll(' ', '_')} IS KNOWN`); - } else if (categ === 'classes') { - parsed[categ].split(', ').forEach(function (item) { - const arity = arity_keys.indexOf(item); - if (arity > 0) filter.push(`nelements=${arity}`); - }); - } - }); - - return filter.join(' AND '); - } - - /* - * API - */ - return { - guess, - to_optimade, - - is_formula_anonymous, - sort_formula, - parse_aeatoms, - termify_formulae, - is_numeric, - - arity_keys, - periodic_elements, - periodic_elements_cased, - periodic_element_names - }; + } + + /* + * Chemical formulae parsing, courtesy of Nathan Leung, MIT license + */ + function countInGroup(tokens, elements, coefficient = 1) { + if (tokens.length === 0) { + return countElements(tokens, elements); + } + + if (tokens[0].type !== 'element') { + // If this happens this is likely a mistake, pass back to countElements + return countElements(tokens, elements); + } + + let i = 0; + while (i < tokens.length) { + if (tokens[i].type === 'element') { + if (tokens.length > i + 1 && tokens[i + 1].type === 'subscript') { + elements[tokens[i].value] += tokens[i + 1].value * coefficient; + i += 2; + continue; + } + elements[tokens[i].value] += coefficient; + i++; + continue; + } + break; + } + return countElements(tokens.slice(i), elements); + } + + /* + * Sorting the formulae alphabetically for Optimade + */ + function sort_formula(text) { + let parsed = false; + try { + parsed = countElements(tokenizeInitial(text)); + } catch (err) { + //console.error(err); + return text; + } + let sorted = '', + els = Object.keys(parsed); + els.sort(); + els.forEach(function (el) { + sorted += el + (parsed[el] === 1 ? '' : parsed[el]); + }); + return sorted; + } + + /* + * Convert MPDS search query object notation into the Optimade filter + */ + function to_optimade(parsed) { + const filter = []; + + ['formulae', 'elements', 'props', 'classes'].forEach(function (categ) { + if (!parsed[categ]) return; + else if (categ === 'formulae') { + if (is_formula_anonymous(parsed[categ])) + filter.push(`chemical_formula_anonymous="${parsed[categ]}"`); + else + filter.push( + `chemical_formula_reduced="${sort_formula(parsed[categ])}"` + ); + } else if (categ === 'elements') { + filter.push( + `elements HAS ALL "${parsed[categ].split('-').join('","')}"` + ); + } else if (categ === 'props') { + filter.push(`_mpds_${parsed[categ].replaceAll(' ', '_')} IS KNOWN`); + } else if (categ === 'classes') { + parsed[categ].split(', ').forEach(function (item) { + const arity = arity_keys.indexOf(item); + if (arity > 0) { + filter.push(`nelements=${arity}`); + } else if (mpds_classes.includes(item)) { + filter.push(`_mpds_classes HAS ALL "${item}"`); + } + }); + } + }); + + if (parsed.numeric && Array.isArray(parsed.numeric)) { + parsed.numeric.forEach(function (numfilt) { + // numfilt: [property, operator, value] + if ( + numfilt.length === 3 && + numfilt[0] && + numfilt[1] && + typeof numfilt[2] !== 'undefined' + ) { + // Remove _mpds_ prefix if present + let prop = numfilt[0].replace(/^_mpds_/, '').replace(/ /g, '_'); + filter.push(`${prop}${numfilt[1]}${numfilt[2]}`); + } + }); + } + + return filter.join(' AND '); + } + + /* + * API + */ + return { + guess, + to_optimade, + + is_formula_anonymous, + sort_formula, + parse_aeatoms, + termify_formulae, + is_numeric, + + arity_keys, + periodic_elements, + periodic_elements_cased, + periodic_element_names, + }; }; if (typeof module !== 'undefined' && module.exports) { - module.exports = OptimadeNLP; -} else if (typeof require === 'function' && typeof require.specified === 'function') { - define(function () { - return OptimadeNLP; - }); + module.exports = OptimadeNLP; +} else if ( + typeof require === 'function' && + typeof require.specified === 'function' +) { + define(function () { + return OptimadeNLP; + }); } else if (window !== undefined) { - window.OptimadeNLP = OptimadeNLP + window.OptimadeNLP = OptimadeNLP; } diff --git a/test_nlp.json b/test_nlp.json index 12844ac..bd61327 100755 --- a/test_nlp.json +++ b/test_nlp.json @@ -1,216 +1,330 @@ [ - ["tio2", { "formulae": "tio2" }], - ["al2o3", { "formulae": "al2o3" }], - ["band gap, chlorides", { "props": "band gap", "classes": "chloride" }], - ["ti o", { "elements": "Ti-O" }], - ["tio2, band gap", { "formulae": "tio2", "props": "band gap" }], - ["organic pb i", { "classes": "organic", "elements": "Pb-I" }], - ["organic pb-i perovskite", { "elements": "Pb-I", "classes": "organic, perovskite" }], - ["ternary oxide", { "classes": "ternary, oxide" }], - ["zeolite ite", { "classes": "zeolite" }], - ["CuLaMnSeO", { "formulae": "CuLaMnSeO" }], - ["AgGaTeSe", { "formulae": "AgGaTeSe" }], - ["MnOCaSeRu", { "formulae": "MnOCaSeRu" }], - ["LaZnOAsAm", { "formulae": "LaZnOAsAm" }], - ["MnCaOSeRu", { "formulae": "MnCaOSeRu" }], - ["MnCaSeBRu", { "formulae": "MnCaSeBRu" }], - ["MnCaSeRu", { "formulae": "MnCaSeRu" }], - ["BCNOAc", { "formulae": "BCNOAc" }], - ["LaZnOAs", { "formulae": "LaZnOAs" }], - ["LaZnAsO", { "formulae": "LaZnAsO" }], - ["LaOZnAs", { "formulae": "LaOZnAs" }], - ["OLaZnAs", { "formulae": "OLaZnAs" }], - ["OLaZnY", { "formulae": "OLaZnY" }], - ["OLaZnYB", { "formulae": "OLaZnYB" }], - ["OLaZnYBr", { "formulae": "OLaZnYBr" }], - ["OLaSZnAs", { "formulae": "OLaSZnAs" }], - ["OLaOZnAs", { "formulae": "OLaOZnAs" }], - ["OLaZnPAs", { "formulae": "OLaZnPAs" }], - ["OLaKZnPAs", { "formulae": "OLaKZnPAs" }], - ["SYPMnBr", { "formulae": "SYPMnBr" }], - ["SYMnPBr", { "formulae": "SYMnPBr" }], - ["RbMnTaO", { "formulae": "RbMnTaO" }], - ["RoMnTaO", {}], - ["CPZnOY", { "formulae": "CPZnOY" }], - ["CPOZnY", { "formulae": "CPOZnY" }], - ["BaLaMnRuO6", { "formulae": "BaLaMnRuO6" }], - ["phase diagrams", { "props": "phase diagram" }], - [ - "thermodynamics superconductor fe", - { "props": "thermodynamics", "classes": "superconductor", "elements": "Fe" } - ], - ["cub", { "lattices": "cubic" }], - [ - "tetragonal srtio3 ternary", - { "lattices": "tetragonal", "formulae": "srtio3", "classes": "ternary" } - ], - [ - "SrTiO₃ tet phonons", - { "lattices": "tetragonal", "formulae": "SrTiO3", "props": "phonons" } - ], - ["O3Al2 elastic properties", { "formulae": "O3Al2" }], - ["optical properties, LiKSO4", { "props": "optical properties", "formulae": "LiKSO4" }], - ["band gap ZnO hex", { "props": "band gap", "lattices": "hexagonal", "formulae": "ZnO" }], - ["geo", {}], - ["GeO", { "formulae": "GeO" }], - ["beo", {}], - ["BeO", { "formulae": "BeO" }], - ["GdNiIn conductivity", { "formulae": "GdNiIn", "props": "conductivity" }], - ["indium binary oxide metal", { "elements": "In", "classes": "binary, oxide, metal" }], - ["Pd hydride lattice", { "elements": "Pd", "classes": "hydride", "props": "lattice" }], - ["Mn halogens Be", { "elements": "Mn-Be", "classes": "halogen" }], - [ - "carbide semiconductor electronic properties", - { "classes": "carbide, semiconductor", "props": "electronic properties" } - ], - [ - "hexagonal carbide semiconductor electronic properties", - { - "lattices": "hexagonal", - "classes": "carbide, semiconductor", - "props": "electronic properties" - } - ], - [ - "rare earth chalcogens organics magnetism", - { "classes": "rare earth, chalcogen, organic", "props": "magnetism" } - ], - ["Te metals transitional Sn", { "elements": "Te-Sn", "classes": "metal, transitional" }], - [ - "radioactive nonmetals conductivity", - { "classes": "radioactive, nonmetal", "props": "conductivity" } - ], - ["noble gases superconductivity", { "classes": "noble gas", "props": "superconductivity" }], - [ - "W-Mo ternary, phase diagram", - { "elements": "W-Mo", "classes": "ternary", "props": "phase diagram" } - ], - [ - "quaternary perovskites, thermodynamics", - { "classes": "quaternary, perovskite", "props": "thermodynamics" } - ], - ["lanthanoid element", { "classes": "lanthanoid, unary" }], - [ - "lanthanoid transitional electronic properties", - { "classes": "lanthanoid, transitional", "props": "electronic properties" } - ], - [ - "lattice of intermetallic binary cubic", - { "props": "lattice", "classes": "intermetallic, binary", "lattices": "cubic" } - ], - ["oxygen", { "elements": "O" }], - [ - "Y oxide lattice ternary", - { "elements": "Y", "classes": "oxide, ternary", "props": "lattice" } - ], - ["phases for ternary ferromagnet", { "props": "phases", "classes": "ternary, ferromagnet" }], - [ - "superconductivity superconductors C", - { "props": "superconductivity", "classes": "superconductor", "elements": "C" } - ], - [ - "magnetism, ferromagnet, chalcogen, monocl", - { "props": "magnetism", "classes": "ferromagnet, chalcogen", "lattices": "monoclinic" } - ], - ["Rg", { "elements": "Rg" }], - ["ErSi", { "formulae": "ErSi" }], - ["ErSI", { "formulae": "ErSI" }], - ["Ca3(PO4)2", { "formulae": "Ca3(PO4)2" }], - ["Ca3[PO4]2", { "formulae": "Ca3[PO4]2" }], - ["conductors", { "classes": "conductor" }], - ["chromium, fe, tricl", { "elements": "Cr-Fe", "lattices": "triclinic" }], - ["astatine, cell", { "elements": "At" }], - ["Gruneisen coefficient", { "props": "gruneisen coefficients" }], - ["Seebeck coefficient", { "props": "seebeck coefficient" }], - ["figure of merit", { "props": "figure of merit" }], - ["conductivity", { "props": "conductivity" }], - [ - "crystalline structure of binaries", - { "props": "crystalline structure", "classes": "binary" } - ], - ["crystal cell for binary compound", { "props": "crystal cell", "classes": "binary" }], - ["valence", { "props": "valence" }], - ["conductivity", { "props": "conductivity" }], - ["La magnetic phase diagram", { "elements": "La", "props": "magnetic phase diagram" }], - ["info on phase diagrams plots", { "props": "phase diagram plots" }], - ["optical conductivity", { "props": "optical conductivity" }], - ["cell and atoms", { "classes": "cell and atoms" }], - ["conductor superconductor", { "classes": "conductor, superconductor" }], - ["metal nonmetal", { "classes": "metal, nonmetal" }], - ["metal or nonmetal character", { "props": "metal or nonmetal character" }], - ["electrical resistance values", { "props": "electrical properties" }], - ["temperature derivative by pressure", { "props": "pressure" }], - ["temperature derivative by composition", {}], - ["optical phonon frequency", { "props": "optical properties" }], - ["enthalpy conductivity", { "props": "values of enthalpy" }], - ["decomposis of the magneto", {}], - ["TiO2 SrTiO3", { "formulae": "TiO2" }], - ["average number of 4f electrons", {}], - ["225aaa!", {}], - ["he-he-he", { "elements": "He-He-He" }], - [ - "electronic contribution to thermal conductivity", - { "props": "electronic contribution to thermal conductivity" } - ], - [ - "electronic contribution to superconducting heat capacity", - { "props": "electronic contribution to superconducting heat capacity" } - ], - ["residual resistivity", { "props": "residual resistivity ratio" }], - ["optical absorption no", { "props": "optical absorption coefficient", "elements": "No" }], - ["energy of optical phonon", { "props": "energy of optical phonon" }], - ["luminescence", { "props": "values of luminescence" }], - ["values of luminescence", { "props": "values of luminescence" }], - ["magnetization", { "props": "magnetization" }], - ["values of magnetization", { "props": "values of magnetization" }], - ["energy of longitudinal optical phonon", { "props": "energy of longitudinal optical phonon" }], - [ - "permittivity (dielectric constant) of perovskites ", - { "props": "permittivity", "classes": "perovskite" } - ], - ["density > 1", { "numeric": [["density", ">", 1]], "phased": true }], - ["InFO", { "formulae": "InFO" }], - ["info", {}], - ["Goodbye.", {}], - ["iiii", {}], - ["...", {}], - ["What'd I do?", { "elements": "I" }], - ["OK, let's go", { "formulae": "OK" }], - ["Ginny Danburry's here. Look for her", {}], - ["I think u make me laughing.", { "elements": "I-U" }], - ["physical properties", { "props": "physical properties" }], - ["physical property", { "props": "physical properties" }], - ["electric property", { "props": "electric properties" }], - ["ab initio", {}], - ["ab initio calculations,", { "classes": "ab initio calculations" }], - ["ab initio literature,", { "classes": "ab initio literature" }], - [ - "ab initio literature, ab initio calculations,", - { "classes": "ab initio literature, ab initio calculations" } - ], - ["ab initio calculations P K", { "elements": "P-K", "classes": "ab initio calculations" }], - ["ab initio literature P K", { "elements": "P-K", "classes": "ab initio literature" }], - [ - "ab initio, ab initio calculations, P K", - { "elements": "P-K", "classes": "ab initio calculations" } - ], - [ - "ab initio literature, ab initio calculations, P K", - { "elements": "P-K", "classes": "ab initio literature, ab initio calculations" } - ], - ["A1B2C3", { "formulae": "A1B2C3" }], - ["AB", { "formulae": "AB" }], - ["ab datum", {}], - ["AB ab initio calculations", { "formulae": "AB", "classes": "ab initio calculations" }], - ["ABC7D9", { "formulae": "ABC7D9" }], - ["ABCD", { "formulae": "ABCD" }], - ["ABCDE", {}], - ["filter=author=\"Sąžininga Žąsis\"", {}], - ["(elements HAS \"Ac\" AND nelements=1) OR (elements HAS \"Ac\" AND nsites=1)", {}], - ["nelements=1", {}], - ["elements HAS \"Ti\"", {}], - ["elements HAS ALL \"C\",\"N\",\"O\",\"H\"", {}], - ["elements HAS \"Ti\" AND nelements>3", {}], - ["chemical_formula_reduced=\"Li7Sn2\"", {}], - ["chemical_formula_anonymous=\"ABC\"", {}] + ["tio2", { "formulae": "tio2" }], + ["al2o3", { "formulae": "al2o3" }], + ["band gap, chlorides", { "props": "band gap", "classes": "chloride" }], + ["ti o", { "elements": "Ti-O" }], + ["tio2, band gap", { "formulae": "tio2", "props": "band gap" }], + ["organic pb i", { "classes": "organic", "elements": "Pb-I" }], + [ + "organic pb-i perovskite", + { "elements": "Pb-I", "classes": "organic, perovskite" } + ], + ["ternary oxide", { "classes": "ternary, oxide" }], + ["zeolite ite", { "classes": "zeolite" }], + ["CuLaMnSeO", { "formulae": "CuLaMnSeO" }], + ["AgGaTeSe", { "formulae": "AgGaTeSe" }], + ["MnOCaSeRu", { "formulae": "MnOCaSeRu" }], + ["LaZnOAsAm", { "formulae": "LaZnOAsAm" }], + ["MnCaOSeRu", { "formulae": "MnCaOSeRu" }], + ["MnCaSeBRu", { "formulae": "MnCaSeBRu" }], + ["MnCaSeRu", { "formulae": "MnCaSeRu" }], + ["BCNOAc", { "formulae": "BCNOAc" }], + ["LaZnOAs", { "formulae": "LaZnOAs" }], + ["LaZnAsO", { "formulae": "LaZnAsO" }], + ["LaOZnAs", { "formulae": "LaOZnAs" }], + ["OLaZnAs", { "formulae": "OLaZnAs" }], + ["OLaZnY", { "formulae": "OLaZnY" }], + ["OLaZnYB", { "formulae": "OLaZnYB" }], + ["OLaZnYBr", { "formulae": "OLaZnYBr" }], + ["OLaSZnAs", { "formulae": "OLaSZnAs" }], + ["OLaOZnAs", { "formulae": "OLaOZnAs" }], + ["OLaZnPAs", { "formulae": "OLaZnPAs" }], + ["OLaKZnPAs", { "formulae": "OLaKZnPAs" }], + ["SYPMnBr", { "formulae": "SYPMnBr" }], + ["SYMnPBr", { "formulae": "SYMnPBr" }], + ["RbMnTaO", { "formulae": "RbMnTaO" }], + ["RoMnTaO", {}], + ["CPZnOY", { "formulae": "CPZnOY" }], + ["CPOZnY", { "formulae": "CPOZnY" }], + ["BaLaMnRuO6", { "formulae": "BaLaMnRuO6" }], + ["phase diagrams", { "props": "phase diagram" }], + [ + "thermodynamics superconductor fe", + { "props": "thermodynamics", "classes": "superconductor", "elements": "Fe" } + ], + ["cub", { "lattices": "cubic" }], + [ + "tetragonal srtio3 ternary", + { "lattices": "tetragonal", "formulae": "srtio3", "classes": "ternary" } + ], + [ + "SrTiO₃ tet phonons", + { "lattices": "tetragonal", "formulae": "SrTiO3", "props": "phonons" } + ], + ["O3Al2 elastic properties", { "formulae": "O3Al2" }], + [ + "optical properties, LiKSO4", + { "props": "optical properties", "formulae": "LiKSO4" } + ], + [ + "band gap ZnO hex", + { "props": "band gap", "lattices": "hexagonal", "formulae": "ZnO" } + ], + ["geo", {}], + ["GeO", { "formulae": "GeO" }], + ["beo", {}], + ["BeO", { "formulae": "BeO" }], + ["GdNiIn conductivity", { "formulae": "GdNiIn", "props": "conductivity" }], + [ + "indium binary oxide metal", + { "elements": "In", "classes": "binary, oxide, metal" } + ], + [ + "Pd hydride lattice", + { "elements": "Pd", "classes": "hydride", "props": "lattice" } + ], + ["Mn halogens Be", { "elements": "Mn-Be", "classes": "halogen" }], + [ + "carbide semiconductor electronic properties", + { "classes": "carbide, semiconductor", "props": "electronic properties" } + ], + [ + "hexagonal carbide semiconductor electronic properties", + { + "lattices": "hexagonal", + "classes": "carbide, semiconductor", + "props": "electronic properties" + } + ], + [ + "rare earth chalcogens organics magnetism", + { "classes": "rare earth, chalcogen, organic", "props": "magnetism" } + ], + [ + "Te metals transitional Sn", + { "elements": "Te-Sn", "classes": "metal, transitional" } + ], + [ + "radioactive nonmetals conductivity", + { "classes": "radioactive, nonmetal", "props": "conductivity" } + ], + [ + "noble gases superconductivity", + { "classes": "noble gas", "props": "superconductivity" } + ], + [ + "W-Mo ternary, phase diagram", + { "elements": "W-Mo", "classes": "ternary", "props": "phase diagram" } + ], + [ + "quaternary perovskites, thermodynamics", + { "classes": "quaternary, perovskite", "props": "thermodynamics" } + ], + ["lanthanoid element", { "classes": "lanthanoid, unary" }], + [ + "lanthanoid transitional electronic properties", + { "classes": "lanthanoid, transitional", "props": "electronic properties" } + ], + [ + "lattice of intermetallic binary cubic", + { + "props": "lattice", + "classes": "intermetallic, binary", + "lattices": "cubic" + } + ], + ["oxygen", { "elements": "O" }], + [ + "Y oxide lattice ternary", + { "elements": "Y", "classes": "oxide, ternary", "props": "lattice" } + ], + [ + "phases for ternary ferromagnet", + { "props": "phases", "classes": "ternary, ferromagnet" } + ], + [ + "superconductivity superconductors C", + { + "props": "superconductivity", + "classes": "superconductor", + "elements": "C" + } + ], + [ + "magnetism, ferromagnet, chalcogen, monocl", + { + "props": "magnetism", + "classes": "ferromagnet, chalcogen", + "lattices": "monoclinic" + } + ], + ["Rg", { "elements": "Rg" }], + ["ErSi", { "formulae": "ErSi" }], + ["ErSI", { "formulae": "ErSI" }], + ["Ca3(PO4)2", { "formulae": "Ca3(PO4)2" }], + ["Ca3[PO4]2", { "formulae": "Ca3[PO4]2" }], + ["conductors", { "classes": "conductor" }], + ["chromium, fe, tricl", { "elements": "Cr-Fe", "lattices": "triclinic" }], + ["astatine, cell", { "elements": "At" }], + ["Gruneisen coefficient", { "props": "gruneisen coefficients" }], + ["Seebeck coefficient", { "props": "seebeck coefficient" }], + ["figure of merit", { "props": "figure of merit" }], + ["conductivity", { "props": "conductivity" }], + [ + "crystalline structure of binaries", + { "props": "crystalline structure", "classes": "binary" } + ], + [ + "crystal cell for binary compound", + { "props": "crystal cell", "classes": "binary" } + ], + ["valence", { "props": "valence" }], + ["conductivity", { "props": "conductivity" }], + [ + "La magnetic phase diagram", + { "elements": "La", "props": "magnetic phase diagram" } + ], + ["info on phase diagrams plots", { "props": "phase diagram plots" }], + ["optical conductivity", { "props": "optical conductivity" }], + ["cell and atoms", { "classes": "cell and atoms" }], + ["conductor superconductor", { "classes": "conductor, superconductor" }], + ["metal nonmetal", { "classes": "metal, nonmetal" }], + ["metal or nonmetal character", { "props": "metal or nonmetal character" }], + ["electrical resistance values", { "props": "electrical properties" }], + ["temperature derivative by pressure", { "props": "pressure" }], + ["temperature derivative by composition", {}], + ["optical phonon frequency", { "props": "optical properties" }], + ["enthalpy conductivity", { "props": "values of enthalpy" }], + ["decomposis of the magneto", {}], + ["TiO2 SrTiO3", { "formulae": "TiO2" }], + ["average number of 4f electrons", {}], + ["225aaa!", {}], + ["he-he-he", { "elements": "He-He-He" }], + [ + "electronic contribution to thermal conductivity", + { "props": "electronic contribution to thermal conductivity" } + ], + [ + "electronic contribution to superconducting heat capacity", + { "props": "electronic contribution to superconducting heat capacity" } + ], + ["residual resistivity", { "props": "residual resistivity ratio" }], + [ + "optical absorption no", + { "props": "optical absorption coefficient", "elements": "No" } + ], + ["energy of optical phonon", { "props": "energy of optical phonon" }], + ["luminescence", { "props": "values of luminescence" }], + ["values of luminescence", { "props": "values of luminescence" }], + ["magnetization", { "props": "magnetization" }], + ["values of magnetization", { "props": "values of magnetization" }], + [ + "energy of longitudinal optical phonon", + { "props": "energy of longitudinal optical phonon" } + ], + [ + "permittivity (dielectric constant) of perovskites ", + { "props": "permittivity", "classes": "perovskite" } + ], + ["density > 1", { "numeric": [["density", ">", 1]], "phased": true }], + ["InFO", { "formulae": "InFO" }], + ["info", {}], + ["Goodbye.", {}], + ["iiii", {}], + ["...", {}], + ["What'd I do?", { "elements": "I" }], + ["OK, let's go", { "formulae": "OK" }], + ["Ginny Danburry's here. Look for her", {}], + ["I think u make me laughing.", { "elements": "I-U" }], + ["physical properties", { "props": "physical properties" }], + ["physical property", { "props": "physical properties" }], + ["electric property", { "props": "electric properties" }], + ["ab initio", {}], + ["ab initio calculations,", { "classes": "ab initio calculations" }], + ["ab initio literature,", { "classes": "ab initio literature" }], + [ + "ab initio literature, ab initio calculations,", + { "classes": "ab initio literature, ab initio calculations" } + ], + [ + "ab initio calculations P K", + { "elements": "P-K", "classes": "ab initio calculations" } + ], + [ + "ab initio literature P K", + { "elements": "P-K", "classes": "ab initio literature" } + ], + [ + "ab initio, ab initio calculations, P K", + { "elements": "P-K", "classes": "ab initio calculations" } + ], + [ + "ab initio literature, ab initio calculations, P K", + { + "elements": "P-K", + "classes": "ab initio literature, ab initio calculations" + } + ], + ["A1B2C3", { "formulae": "A1B2C3" }], + ["AB", { "formulae": "AB" }], + ["ab datum", {}], + [ + "AB ab initio calculations", + { "formulae": "AB", "classes": "ab initio calculations" } + ], + ["ABC7D9", { "formulae": "ABC7D9" }], + ["ABCD", { "formulae": "ABCD" }], + ["ABCDE", {}], + ["filter=author=\"Sąžininga Žąsis\"", {}], + [ + "(elements HAS \"Ac\" AND nelements=1) OR (elements HAS \"Ac\" AND nsites=1)", + {} + ], + ["nelements=1", {}], + ["elements HAS \"Ti\"", {}], + ["elements HAS ALL \"C\",\"N\",\"O\",\"H\"", {}], + ["elements HAS \"Ti\" AND nelements>3", {}], + ["chemical_formula_reduced=\"Li7Sn2\"", {}], + ["chemical_formula_anonymous=\"ABC\"", {}], + ["elements HAS \"C\" AND elements HAS \"N\" AND elements HAS \"O\"", {}], + ["elements HAS ANY \"C\",\"N\",\"O\"", {}], + ["elements HAS \"C\" OR elements HAS \"N\" OR elements HAS \"O\"", {}], + ["elements HAS \"C\" AND (elements HAS \"N\" OR elements HAS \"O\")", {}], + ["elements HAS ALL \"C\",\"N\",\"O\" AND nelements=3", {}], + ["elements HAS ALL \"C\",\"N\",\"O\" AND nelements=4", {}], + ["elements HAS ANY \"C\",\"N\",\"O\" AND nelements=3", {}], + ["elements HAS ANY \"C\",\"N\",\"O\" AND nelements=4", {}], + ["nelements>3 AND elements HAS ANY \"C\",\"N\",\"O\"", {}], + ["nelements>4 AND elements HAS ANY \"C\",\"N\",\"O\"", {}], + ["nelements>3 AND elements HAS ALL \"C\",\"N\",\"O\"", {}], + ["nelements>4 AND elements HAS ALL \"C\",\"N\",\"O\"", {}], + ["band gap>1.5", { "numeric": [["band gap", ">", 1.5]], "phased": true }], + ["band gap<1.5", { "numeric": [["band gap", "<", 1.5]], "phased": true }], + ["band gap>=1.5", { "numeric": [["band gap", ">=", 1.5]], "phased": true }], + ["band gap<=1.5", { "numeric": [["band gap", "<=", 1.5]], "phased": true }], + ["band gap=1.5", { "numeric": [["band gap", "=", 1.5]], "phased": true }], + ["band gap!=1.5", { "numeric": [["band gap", "!=", 1.5]], "phased": true }], + [ + "band gap>1.5 AND band gap<2.0", + { + "numeric": [ + ["band gap", ">", 1.5], + ["band gap", "<", 2.0] + ], + "phased": true + } + ], + ["chalcogens", { "elements": "O-S-Se-Te-Po" }], + ["period 2", { "elements": "Li-Be-B-C-N-O-F-Ne" }], + ["group 11", { "elements": "Cu-Ag-Au-Rg" }], + ["tetrels", { "elements": "C-Si-Ge-Sn-Pb-Fl" }], + ["tetrel", { "elements": "C-Si-Ge-Sn-Pb-Fl" }], + ["all tetrels", { "elements": "C-Si-Ge-Sn-Pb-Fl" }], + + ["all chalcogens", { "elements": "O-S-Se-Te-Po" }], + ["Chalcogens", { "elements": "O-S-Se-Te-Po" }], + ["chalcogen", { "elements": "O-S-Se-Te-Po" }], + ["CHALCOGENS", { "elements": "O-S-Se-Te-Po" }], + + ["period 2 elements", { "elements": "Li-Be-B-C-N-O-F-Ne" }], + ["Period 2", { "elements": "Li-Be-B-C-N-O-F-Ne" }], + ["PERIOD 2", { "elements": "Li-Be-B-C-N-O-F-Ne" }], + + ["group 11 elements", { "elements": "Cu-Ag-Au-Rg" }], + ["Group 11", { "elements": "Cu-Ag-Au-Rg" }], + ["ALL GROUP 11", { "elements": "Cu-Ag-Au-Rg" }], + + ["tetrel", { "elements": "C-Si-Ge-Sn-Pb-Fl" }], + ["Tetrels", { "elements": "C-Si-Ge-Sn-Pb-Fl" }], + ["ALL tetrels", { "elements": "C-Si-Ge-Sn-Pb-Fl" }] ] From d43c7a5f7b4129542ef172daf993b61cb390345c Mon Sep 17 00:00:00 2001 From: luckylionheart Date: Fri, 17 Oct 2025 00:27:51 -0400 Subject: [PATCH 2/2] update: disable code format(prettier) --- index.js | 4120 ++++++++++++++++++++++++------------------------- test_nlp.json | 489 +++--- 2 files changed, 2226 insertions(+), 2383 deletions(-) diff --git a/index.js b/index.js index f85d004..af1bc45 100755 --- a/index.js +++ b/index.js @@ -13,893 +13,893 @@ * Own matchAll used for the chemical formulae */ function getMatchAll(inputstr, regexp) { - const matches = []; - inputstr.replace(regexp, function (...args) { - const arr = [].slice.call(args, 0), - extras = arr.splice(-2); - arr.index = extras[0]; - arr.input = extras[1]; - matches.push(arr); - }); - return matches.length ? matches : null; + const matches = []; + inputstr.replace(regexp, function (...args) { + const arr = [].slice.call(args, 0), + extras = arr.splice(-2); + arr.index = extras[0]; + arr.input = extras[1]; + matches.push(arr); + }); + return matches.length ? matches : null; } function capitalize(string) { - return string.charAt(0).toUpperCase() + string.slice(1); + return string.charAt(0).toUpperCase() + string.slice(1); } const OptimadeNLP = function () { - /* - * Definitions - */ - const stop_words = [ - 'a', - 'about', - 'above', - 'after', - 'again', - 'against', - 'all', - 'am', - 'an', - 'and', - 'any', - 'are', - "aren't", - 'as', - 'at', - 'be', - 'because', - 'been', - 'before', - 'being', - 'below', - 'between', - 'both', - 'but', - 'by', - "can't", - 'cannot', - 'could', - "couldn't", - 'did', - "didn't", - 'do', - 'does', - "doesn't", - 'doing', - "don't", - 'down', - 'during', - 'each', - 'few', - 'for', - 'from', - 'further', - 'had', - "hadn't", - 'has', - "hasn't", - 'have', - "haven't", - 'having', - 'he', - "he'd", - "he'll", - "he's", - 'her', - 'here', - "here's", - 'hers', - 'herself', - 'him', - 'himself', - 'his', - 'how', - "how's", - 'i', - "i'd", - "i'll", - "i'm", - "i've", - 'if', - 'in', - 'into', - 'is', - "isn't", - 'it', - "it's", - 'its', - 'itself', - "let's", - 'me', - 'more', - 'most', - "mustn't", - 'my', - 'myself', - 'no', - 'nor', - 'not', - 'of', - 'off', - 'on', - 'once', - 'only', - 'or', - 'other', - 'ought', - 'our', - 'ours', - 'ourselves', - 'out', - 'over', - 'own', - 'same', - "shan't", - 'she', - "she'd", - "she'll", - "she's", - 'should', - "shouldn't", - 'so', - 'some', - 'such', - 'than', - 'that', - "that's", - 'the', - 'their', - 'theirs', - 'them', - 'themselves', - 'then', - 'there', - "there's", - 'these', - 'they', - "they'd", - "they'll", - "they're", - "they've", - 'this', - 'those', - 'through', - 'to', - 'too', - 'u', - 'under', - 'until', - 'up', - 'very', - 'was', - "wasn't", - 'we', - "we'd", - "we'll", - "we're", - "we've", - 'were', - "weren't", - 'what', - "what's", - 'when', - "when's", - 'where', - "where's", - 'which', - 'while', - 'who', - "who's", - 'whom', - 'why', - "why's", - 'with', - "won't", - 'would', - "wouldn't", - 'you', - "you'd", - "you'll", - "you're", - "you've", - 'your', - 'yours', - 'yourself', - 'yourselves', - ]; /* exact */ - - const arity_keys = [ - null, - 'unary', - 'binary', - 'ternary', - 'quaternary', - 'quinary', - 'multinary', - 'multinary', - 'multinary', - 'multinary', - 'multinary', - ]; // NB null is for "0-ary" - - const periodic_elements = [ - 'h', - 'he', - 'li', - 'be', - 'b', - 'c', - 'n', - 'o', - 'f', - 'ne', - 'na', - 'mg', - 'al', - 'si', - 'p', - 's', - 'cl', - 'ar', - 'k', - 'ca', - 'sc', - 'ti', - 'v', - 'cr', - 'mn', - 'fe', - 'co', - 'ni', - 'cu', - 'zn', - 'ga', - 'ge', - 'as', - 'se', - 'br', - 'kr', - 'rb', - 'sr', - 'y', - 'zr', - 'nb', - 'mo', - 'tc', - 'ru', - 'rh', - 'pd', - 'ag', - 'cd', - 'in', - 'sn', - 'sb', - 'te', - 'i', - 'xe', - 'cs', - 'ba', - 'la', - 'ce', - 'pr', - 'nd', - 'pm', - 'sm', - 'eu', - 'gd', - 'tb', - 'dy', - 'ho', - 'er', - 'tm', - 'yb', - 'lu', - 'hf', - 'ta', - 'w', - 're', - 'os', - 'ir', - 'pt', - 'au', - 'hg', - 'tl', - 'pb', - 'bi', - 'po', - 'at', - 'rn', - 'fr', - 'ra', - 'ac', - 'th', - 'pa', - 'u', - 'np', - 'pu', - 'am', - 'cm', - 'bk', - 'cf', - 'es', - 'fm', - 'md', - 'no', - 'lr', - 'rf', - 'db', - 'sg', - 'bh', - 'hs', - 'mt', - 'ds', - 'rg', - 'cn', - 'nh', - 'fl', - 'mc', - 'lv', - 'ts', - 'og', - ]; /* exact */ - - const periodic_elements_cased = periodic_elements.map(function (x) { - return capitalize(x); - }); - - const periodic_element_names = [ - 'hydrogen', - 'helium', - 'lithium', - 'beryllium', - 'boron', - 'carbon', - 'nitrogen', - 'oxygen', - 'fluorine', - 'neon', - 'sodium', - 'magnesium', - 'aluminium', - 'silicon', - 'phosphorus', - 'sulfur', - 'chlorine', - 'argon', - 'potassium', - 'calcium', - 'scandium', - 'titanium', - 'vanadium', - 'chromium', - 'manganese', - 'iron', - 'cobalt', - 'nickel', - 'copper', - 'zinc', - 'gallium', - 'germanium', - 'arsenic', - 'selenium', - 'bromine', - 'krypton', - 'rubidium', - 'strontium', - 'yttrium', - 'zirconium', - 'niobium', - 'molybdenum', - 'technetium', - 'ruthenium', - 'rhodium', - 'palladium', - 'silver', - 'cadmium', - 'indium', - 'tin', - 'antimony', - 'tellurium', - 'iodine', - 'xenon', - 'caesium', - 'barium', - 'lanthanum', - 'cerium', - 'praseodymium', - 'neodymium', - 'promethium', - 'samarium', - 'europium', - 'gadolinium', - 'terbium', - 'dysprosium', - 'holmium', - 'erbium', - 'thulium', - 'ytterbium', - 'lutetium', - 'hafnium', - 'tantalum', - 'tungsten', - 'rhenium', - 'osmium', - 'iridium', - 'platinum', - 'gold', - 'mercury', - 'thallium', - 'lead', - 'bismuth', - 'polonium', - 'astatine', - 'radon', - 'francium', - 'radium', - 'actinium', - 'thorium', - 'protactinium', - 'uranium', - 'neptunium', - 'plutonium', - 'americium', - 'curium', - 'berkelium', - 'californium', - 'einsteinium', - 'fermium', - 'mendelevium', - 'nobelium', - 'lawrencium', - 'rutherfordium', - 'dubnium', - 'seaborgium', - 'bohrium', - 'hassium', - 'meitnerium', - 'darmstadium', - 'roentgenium', - 'copernicium', - 'nihonium', - 'flerovium', - 'moscovium', - 'livermorium', - 'tennessine', - 'oganesson', - ]; /* fuzzy */ - - const lat_p2i = { - cubic: 1, - hexagonal: 2, - trigonal: 3, - tetragonal: 4, - orthorhombic: 5, - monoclinic: 6, - triclinic: 7, - rhombohedral: 3, - cub: 1, - hex: 2, - hexag: 2, - trig: 3, - tet: 4, - tetr: 4, - tetrag: 4, - orth: 5, - ortho: 5, - monocl: 6, - tric: 7, - tricl: 7, - rhom: 3, - rhomb: 3, - }; - const lat_fgrs = Object.keys(lat_p2i); - const lat_i2p = { - 1: 'cubic', - 2: 'hexagonal', - 3: 'trigonal', - 4: 'tetragonal', - 5: 'orthorhombic', - 6: 'monoclinic', - 7: 'triclinic', - }; - - const mpds_classes = [ - 'ab initio calculations', - 'ab initio literature', - 'actinoid', - 'adamantane', - 'aegirine', - 'alkali', - 'alkaline', - 'allargentum', - 'almandine', - 'alum', - 'alunogen', - 'amide', - 'analcime', - 'anatase', - 'anorpiment', - 'anorthoclase', - 'antiferroelectric', - 'antiferromagnet', - 'antiferromagnetic', - 'arsenate', - 'arsenide', - 'ashcroftine', - 'auricupride', - 'aurocupride', - 'azide', - 'baileychlore', - 'bariopyrochlore', - 'baryte', - 'beryl', - 'beta-alumina', - 'beta-boron', - 'biguanide', - 'binary', - 'birefringent', - 'borane', - 'borate', - 'borax', - 'boride', - 'borocarbide', - 'borohydride', - 'boronitride', - 'botryogen', - 'bromanilate', - 'bromide', - 'bromoimide', - 'calomel', - 'carbamate', - 'carbide', - 'carbonate', - 'carbonyl', - 'carboxylate', - 'celestine', - 'cell and atoms', - 'cell-only', - 'celsian', - 'cesiokenopyrochlore', - 'chalcogen', - 'charge-density wave state', - 'chevrel', - 'chimney-ladder', - 'chloranilate', - 'chlorate', - 'chloride', - 'chloritoid', - 'chlorosulfate', - 'chromate', - 'chrysoberyl', - 'chrysotile', - 'cinnabar', - 'clathrate', - 'clinochlore', - 'clinoclase', - 'clodronate', - 'close-packed', - 'cluster glass', - 'colossal magnetoresistance', - 'conductor', - 'corundum', - 'cosmochlor', - 'croconate violet', - 'croconate', - 'cryptomelane', - 'cuprate', - 'cuspidine', - 'cyamelurate', - 'cyanamide', - 'cyanamidonitrate', - 'cyananilate', - 'cyanide', - 'cyanotetrazolate', - 'cyanoureate', - 'cyanurate', - 'cyprine', - 'davyne', - 'deuteride', - 'deuterium', - 'devilline', - 'diamagnetic', - 'diamond', - 'diarsenate', - 'diaspore', - 'diazanide', - 'diazenide', - 'dichromate', - 'digermanate', - 'diiodobromide', - 'dinitramide', - 'diopside', - 'dioptase', - 'dioxobromate', - 'dioxoiodate', - 'dioxosulfate', - 'dioxothiosulfate', - 'diphosphate', - 'diphosphonate', - 'dipolyhedral', - 'diselenate', - 'disilicate', - 'disordered', - 'disulfate', - 'dithiocarbamate', - 'dithiocarbonate', - 'dithionate', - 'dithiooxalate', - 'dithiophosphate', - 'dithiosquarate', - 'divanadate', - 'epidote', - 'euchlorine', - 'euclase', - 'eudialyte', - 'eulytine', - 'fermi liquid', - 'feroxihyte', - 'feroxyhyte', - 'ferrielectric', - 'ferrimagnet', - 'ferroelastic', - 'ferroelectric', - 'ferromagnet', - 'ferromagnetic', - 'fluor-schorl', - 'fluoride', - 'fluoroborate', - 'frank-kasper', - 'friauf-laves', - 'fulleride', - 'fulminate', - 'galena', - 'gamma-brass', - 'garnet', - 'giant magnetocaloric effect', - 'gismondine', - 'glaucodot', - 'glaucophane', - 'grossular', - 'guanidinate', - 'gypsum', - 'hafnon', - 'half metal', - 'halogen', - 'hard magnet', - 'harmotome', - 'haueyne', - 'heavy fermion', - 'hedyphane', - 'helimagnet', - 'helvine', - 'hexasulfate', - 'hexathionate', - 'host-guest', - 'humboldtine', - 'hydrate', - 'hydride', - 'hydroxide', - 'hypercinnabar', - 'hypophosphate', - 'ice', - 'imide', - 'iminate', - 'intercalation', - 'intermediate valence', - 'intermetallic', - 'iodate', - 'iodide', - 'ionic conductor', - 'iridium', - 'isoferroplatinum', - 'isopolyhedral', - 'isothermal section', - 'kornerupine', - 'kosmochlor', - 'lanthanoid', - 'lavendulan', - 'levyne', - 'lime', - 'liquidus projection', - 'litharge', - 'lithiophosphate', - 'luminescent', - 'machine learning', - 'machine-learning', - 'magnesiochloritoid', - 'magnetoelastic', - 'magnetoelectric', - 'manganate', - 'massicot', - 'mellitate', - 'melonate', - 'metacinnabar', - 'metal', - 'metalloid', - 'metamagnet', - 'metavoltine', - 'mica', - 'microcline', - 'microline', - 'mictomagnet', - 'minium', - 'molybdate', - 'multiferroic', - 'multinary', - 'nasicon', - 'natron', - 'natrophosphate', - 'natroxalate', - 'negative thermal expansion', - 'nepheline', - 'nickeline', - 'niobocarbide', - 'niter', - 'nitranilate', - 'nitrate', - 'nitratine', - 'nitride', - 'nitroformate', - 'noble gas', - 'non disordered', - 'non-disordered', - 'non-linear optics', - 'nonaflate', - 'nonmetal', - 'nordenskioeldine', - 'nosean', - 'olivine', - 'optically isotropic', - 'organic', - 'orpiment', - 'orthoborate', - 'orthoclase', - 'orthogermanate', - 'orthonitrate', - 'orthophosphate', - 'orthosilicate', - 'oxalate', - 'oxamate', - 'oxide', - 'oxoiodate', - 'oxonitrate', - 'oxotetrazolate', - 'oxy-schorl', - 'ozonide', - 'palladogermanide', - 'paracelsian', - 'paramagnet', - 'pararealgar', - 'pauli paramagnet', - 'peer review', - 'peer reviewed', - 'peer-review', - 'peer-reviewed', - 'perchlorate', - 'perhydrate', - 'periclase', - 'periodate', - 'permanganate', - 'pernitride', - 'peroxide', - 'pertechnetate', - 'phosphate', - 'phosphide', - 'phosphinate', - 'phosphonate', - 'photocatalyst', - 'photovoltaic effect', - 'photovoltaic', - 'piezoelectric', - 'pnictogen', - 'polaron conductor', - 'polycrase', - 'potassic', - 'prism', - 'prussian blue', - 'pseudorutile', - 'pyrochlore', - 'pyroelectric', - 'pyrope', - 'quadridavyne', - 'quartz', - 'quasicrystal', - 'quaternary', - 'quinary', - 'radioactive', - 'rare earth', - 'realgar', - 'refractory', - 'relaxor', - 'retzian', - 'rhodarsenide', - 'rhodizonate', - 'rhomboclase', - 'rocksalt', - 'rubicline', - 'ruddlesden-popper', - 'rutheniridosmine', - 'rutherfordine', - 'rutile', - 'salammoniac', - 'sanidine', - 'sapphirine', - 'sarcopside', - 'schorl', - 'selenide', - 'selenidel', - 'semiconductor', - 'semimetal', - 'shape memory effect', - 'shape memory', - 'siderotil', - 'silanide', - 'silicate', - 'silicide', - 'sillen-aurivillius', - 'skyrmion', - 'solidus projection', - 'solvus projection', - 'spessartine', - 'spin glass', - 'spinel', - 'spodumene', - 'squarate', - 'steenstrupine', - 'stibarsen', - 'subsolidus relations', - 'sulfamate', - 'sulfamide', - 'sulfate', - 'sulfide', - 'sulfidel', - 'sulfinylamide', - 'superconductor', - 'superhard', - 'superionic conductor', - 'superoxide', - 'talc', - 'tantalcarbide', - 'tellurantimony', - 'telluride', - 'ternary', - 'tetrahedron', - 'thermoelectric', - 'thiocyanate', - 'thiocyanurate', - 'thiophosphate', - 'thiosulfate', - 'topaz', - 'topological insulator', - 'tourmaline', - 'transitional', - 'triflate', - 'tripolyhedral', - 'triteride', - 'trithionate', - 'tritide', - 'tritium', - 'trona', - 'tungstate', - 'turquoise', - 'ulvoespinel', - 'unary', - 'uranophane', - 'uranopolycrase', - 'urate', - 'urea', - 'ureate', - 'van vleck paramagnet', - 'vanadate', - 'vertical section', - 'violurate', - 'xenotime', - 'zincobotryogen', - 'zircon', - 'zircosulfate', - ]; + /* + * Definitions + */ + const stop_words = [ + 'a', + 'about', + 'above', + 'after', + 'again', + 'against', + 'all', + 'am', + 'an', + 'and', + 'any', + 'are', + "aren't", + 'as', + 'at', + 'be', + 'because', + 'been', + 'before', + 'being', + 'below', + 'between', + 'both', + 'but', + 'by', + "can't", + 'cannot', + 'could', + "couldn't", + 'did', + "didn't", + 'do', + 'does', + "doesn't", + 'doing', + "don't", + 'down', + 'during', + 'each', + 'few', + 'for', + 'from', + 'further', + 'had', + "hadn't", + 'has', + "hasn't", + 'have', + "haven't", + 'having', + 'he', + "he'd", + "he'll", + "he's", + 'her', + 'here', + "here's", + 'hers', + 'herself', + 'him', + 'himself', + 'his', + 'how', + "how's", + 'i', + "i'd", + "i'll", + "i'm", + "i've", + 'if', + 'in', + 'into', + 'is', + "isn't", + 'it', + "it's", + 'its', + 'itself', + "let's", + 'me', + 'more', + 'most', + "mustn't", + 'my', + 'myself', + 'no', + 'nor', + 'not', + 'of', + 'off', + 'on', + 'once', + 'only', + 'or', + 'other', + 'ought', + 'our', + 'ours', + 'ourselves', + 'out', + 'over', + 'own', + 'same', + "shan't", + 'she', + "she'd", + "she'll", + "she's", + 'should', + "shouldn't", + 'so', + 'some', + 'such', + 'than', + 'that', + "that's", + 'the', + 'their', + 'theirs', + 'them', + 'themselves', + 'then', + 'there', + "there's", + 'these', + 'they', + "they'd", + "they'll", + "they're", + "they've", + 'this', + 'those', + 'through', + 'to', + 'too', + 'u', + 'under', + 'until', + 'up', + 'very', + 'was', + "wasn't", + 'we', + "we'd", + "we'll", + "we're", + "we've", + 'were', + "weren't", + 'what', + "what's", + 'when', + "when's", + 'where', + "where's", + 'which', + 'while', + 'who', + "who's", + 'whom', + 'why', + "why's", + 'with', + "won't", + 'would', + "wouldn't", + 'you', + "you'd", + "you'll", + "you're", + "you've", + 'your', + 'yours', + 'yourself', + 'yourselves', + ]; /* exact */ + + const arity_keys = [ + null, + 'unary', + 'binary', + 'ternary', + 'quaternary', + 'quinary', + 'multinary', + 'multinary', + 'multinary', + 'multinary', + 'multinary', + ]; // NB null is for "0-ary" + + const periodic_elements = [ + 'h', + 'he', + 'li', + 'be', + 'b', + 'c', + 'n', + 'o', + 'f', + 'ne', + 'na', + 'mg', + 'al', + 'si', + 'p', + 's', + 'cl', + 'ar', + 'k', + 'ca', + 'sc', + 'ti', + 'v', + 'cr', + 'mn', + 'fe', + 'co', + 'ni', + 'cu', + 'zn', + 'ga', + 'ge', + 'as', + 'se', + 'br', + 'kr', + 'rb', + 'sr', + 'y', + 'zr', + 'nb', + 'mo', + 'tc', + 'ru', + 'rh', + 'pd', + 'ag', + 'cd', + 'in', + 'sn', + 'sb', + 'te', + 'i', + 'xe', + 'cs', + 'ba', + 'la', + 'ce', + 'pr', + 'nd', + 'pm', + 'sm', + 'eu', + 'gd', + 'tb', + 'dy', + 'ho', + 'er', + 'tm', + 'yb', + 'lu', + 'hf', + 'ta', + 'w', + 're', + 'os', + 'ir', + 'pt', + 'au', + 'hg', + 'tl', + 'pb', + 'bi', + 'po', + 'at', + 'rn', + 'fr', + 'ra', + 'ac', + 'th', + 'pa', + 'u', + 'np', + 'pu', + 'am', + 'cm', + 'bk', + 'cf', + 'es', + 'fm', + 'md', + 'no', + 'lr', + 'rf', + 'db', + 'sg', + 'bh', + 'hs', + 'mt', + 'ds', + 'rg', + 'cn', + 'nh', + 'fl', + 'mc', + 'lv', + 'ts', + 'og' + ]; /* exact */ + + const periodic_elements_cased = periodic_elements.map(function (x) { + return capitalize(x); + }); + + const periodic_element_names = [ + 'hydrogen', + 'helium', + 'lithium', + 'beryllium', + 'boron', + 'carbon', + 'nitrogen', + 'oxygen', + 'fluorine', + 'neon', + 'sodium', + 'magnesium', + 'aluminium', + 'silicon', + 'phosphorus', + 'sulfur', + 'chlorine', + 'argon', + 'potassium', + 'calcium', + 'scandium', + 'titanium', + 'vanadium', + 'chromium', + 'manganese', + 'iron', + 'cobalt', + 'nickel', + 'copper', + 'zinc', + 'gallium', + 'germanium', + 'arsenic', + 'selenium', + 'bromine', + 'krypton', + 'rubidium', + 'strontium', + 'yttrium', + 'zirconium', + 'niobium', + 'molybdenum', + 'technetium', + 'ruthenium', + 'rhodium', + 'palladium', + 'silver', + 'cadmium', + 'indium', + 'tin', + 'antimony', + 'tellurium', + 'iodine', + 'xenon', + 'caesium', + 'barium', + 'lanthanum', + 'cerium', + 'praseodymium', + 'neodymium', + 'promethium', + 'samarium', + 'europium', + 'gadolinium', + 'terbium', + 'dysprosium', + 'holmium', + 'erbium', + 'thulium', + 'ytterbium', + 'lutetium', + 'hafnium', + 'tantalum', + 'tungsten', + 'rhenium', + 'osmium', + 'iridium', + 'platinum', + 'gold', + 'mercury', + 'thallium', + 'lead', + 'bismuth', + 'polonium', + 'astatine', + 'radon', + 'francium', + 'radium', + 'actinium', + 'thorium', + 'protactinium', + 'uranium', + 'neptunium', + 'plutonium', + 'americium', + 'curium', + 'berkelium', + 'californium', + 'einsteinium', + 'fermium', + 'mendelevium', + 'nobelium', + 'lawrencium', + 'rutherfordium', + 'dubnium', + 'seaborgium', + 'bohrium', + 'hassium', + 'meitnerium', + 'darmstadium', + 'roentgenium', + 'copernicium', + 'nihonium', + 'flerovium', + 'moscovium', + 'livermorium', + 'tennessine', + 'oganesson' + ]; /* fuzzy */ + + const lat_p2i = { + cubic: 1, + hexagonal: 2, + trigonal: 3, + tetragonal: 4, + orthorhombic: 5, + monoclinic: 6, + triclinic: 7, + rhombohedral: 3, + cub: 1, + hex: 2, + hexag: 2, + trig: 3, + tet: 4, + tetr: 4, + tetrag: 4, + orth: 5, + ortho: 5, + monocl: 6, + tric: 7, + tricl: 7, + rhom: 3, + rhomb: 3, + }; + const lat_fgrs = Object.keys(lat_p2i); + const lat_i2p = { + 1: 'cubic', + 2: 'hexagonal', + 3: 'trigonal', + 4: 'tetragonal', + 5: 'orthorhombic', + 6: 'monoclinic', + 7: 'triclinic', + }; + + const mpds_classes = [ + 'ab initio calculations', + 'ab initio literature', + 'actinoid', + 'adamantane', + 'aegirine', + 'alkali', + 'alkaline', + 'allargentum', + 'almandine', + 'alum', + 'alunogen', + 'amide', + 'analcime', + 'anatase', + 'anorpiment', + 'anorthoclase', + 'antiferroelectric', + 'antiferromagnet', + 'antiferromagnetic', + 'arsenate', + 'arsenide', + 'ashcroftine', + 'auricupride', + 'aurocupride', + 'azide', + 'baileychlore', + 'bariopyrochlore', + 'baryte', + 'beryl', + 'beta-alumina', + 'beta-boron', + 'biguanide', + 'binary', + 'birefringent', + 'borane', + 'borate', + 'borax', + 'boride', + 'borocarbide', + 'borohydride', + 'boronitride', + 'botryogen', + 'bromanilate', + 'bromide', + 'bromoimide', + 'calomel', + 'carbamate', + 'carbide', + 'carbonate', + 'carbonyl', + 'carboxylate', + 'celestine', + 'cell and atoms', + 'cell-only', + 'celsian', + 'cesiokenopyrochlore', + 'chalcogen', + 'charge-density wave state', + 'chevrel', + 'chimney-ladder', + 'chloranilate', + 'chlorate', + 'chloride', + 'chloritoid', + 'chlorosulfate', + 'chromate', + 'chrysoberyl', + 'chrysotile', + 'cinnabar', + 'clathrate', + 'clinochlore', + 'clinoclase', + 'clodronate', + 'close-packed', + 'cluster glass', + 'colossal magnetoresistance', + 'conductor', + 'corundum', + 'cosmochlor', + 'croconate violet', + 'croconate', + 'cryptomelane', + 'cuprate', + 'cuspidine', + 'cyamelurate', + 'cyanamide', + 'cyanamidonitrate', + 'cyananilate', + 'cyanide', + 'cyanotetrazolate', + 'cyanoureate', + 'cyanurate', + 'cyprine', + 'davyne', + 'deuteride', + 'deuterium', + 'devilline', + 'diamagnetic', + 'diamond', + 'diarsenate', + 'diaspore', + 'diazanide', + 'diazenide', + 'dichromate', + 'digermanate', + 'diiodobromide', + 'dinitramide', + 'diopside', + 'dioptase', + 'dioxobromate', + 'dioxoiodate', + 'dioxosulfate', + 'dioxothiosulfate', + 'diphosphate', + 'diphosphonate', + 'dipolyhedral', + 'diselenate', + 'disilicate', + 'disordered', + 'disulfate', + 'dithiocarbamate', + 'dithiocarbonate', + 'dithionate', + 'dithiooxalate', + 'dithiophosphate', + 'dithiosquarate', + 'divanadate', + 'epidote', + 'euchlorine', + 'euclase', + 'eudialyte', + 'eulytine', + 'fermi liquid', + 'feroxihyte', + 'feroxyhyte', + 'ferrielectric', + 'ferrimagnet', + 'ferroelastic', + 'ferroelectric', + 'ferromagnet', + 'ferromagnetic', + 'fluor-schorl', + 'fluoride', + 'fluoroborate', + 'frank-kasper', + 'friauf-laves', + 'fulleride', + 'fulminate', + 'galena', + 'gamma-brass', + 'garnet', + 'giant magnetocaloric effect', + 'gismondine', + 'glaucodot', + 'glaucophane', + 'grossular', + 'guanidinate', + 'gypsum', + 'hafnon', + 'half metal', + 'halogen', + 'hard magnet', + 'harmotome', + 'haueyne', + 'heavy fermion', + 'hedyphane', + 'helimagnet', + 'helvine', + 'hexasulfate', + 'hexathionate', + 'host-guest', + 'humboldtine', + 'hydrate', + 'hydride', + 'hydroxide', + 'hypercinnabar', + 'hypophosphate', + 'ice', + 'imide', + 'iminate', + 'intercalation', + 'intermediate valence', + 'intermetallic', + 'iodate', + 'iodide', + 'ionic conductor', + 'iridium', + 'isoferroplatinum', + 'isopolyhedral', + 'isothermal section', + 'kornerupine', + 'kosmochlor', + 'lanthanoid', + 'lavendulan', + 'levyne', + 'lime', + 'liquidus projection', + 'litharge', + 'lithiophosphate', + 'luminescent', + 'machine learning', + 'machine-learning', + 'magnesiochloritoid', + 'magnetoelastic', + 'magnetoelectric', + 'manganate', + 'massicot', + 'mellitate', + 'melonate', + 'metacinnabar', + 'metal', + 'metalloid', + 'metamagnet', + 'metavoltine', + 'mica', + 'microcline', + 'microline', + 'mictomagnet', + 'minium', + 'molybdate', + 'multiferroic', + 'multinary', + 'nasicon', + 'natron', + 'natrophosphate', + 'natroxalate', + 'negative thermal expansion', + 'nepheline', + 'nickeline', + 'niobocarbide', + 'niter', + 'nitranilate', + 'nitrate', + 'nitratine', + 'nitride', + 'nitroformate', + 'noble gas', + 'non disordered', + 'non-disordered', + 'non-linear optics', + 'nonaflate', + 'nonmetal', + 'nordenskioeldine', + 'nosean', + 'olivine', + 'optically isotropic', + 'organic', + 'orpiment', + 'orthoborate', + 'orthoclase', + 'orthogermanate', + 'orthonitrate', + 'orthophosphate', + 'orthosilicate', + 'oxalate', + 'oxamate', + 'oxide', + 'oxoiodate', + 'oxonitrate', + 'oxotetrazolate', + 'oxy-schorl', + 'ozonide', + 'palladogermanide', + 'paracelsian', + 'paramagnet', + 'pararealgar', + 'pauli paramagnet', + 'peer review', + 'peer reviewed', + 'peer-review', + 'peer-reviewed', + 'perchlorate', + 'perhydrate', + 'periclase', + 'periodate', + 'permanganate', + 'pernitride', + 'peroxide', + 'pertechnetate', + 'phosphate', + 'phosphide', + 'phosphinate', + 'phosphonate', + 'photocatalyst', + 'photovoltaic effect', + 'photovoltaic', + 'piezoelectric', + 'pnictogen', + 'polaron conductor', + 'polycrase', + 'potassic', + 'prism', + 'prussian blue', + 'pseudorutile', + 'pyrochlore', + 'pyroelectric', + 'pyrope', + 'quadridavyne', + 'quartz', + 'quasicrystal', + 'quaternary', + 'quinary', + 'radioactive', + 'rare earth', + 'realgar', + 'refractory', + 'relaxor', + 'retzian', + 'rhodarsenide', + 'rhodizonate', + 'rhomboclase', + 'rocksalt', + 'rubicline', + 'ruddlesden-popper', + 'rutheniridosmine', + 'rutherfordine', + 'rutile', + 'salammoniac', + 'sanidine', + 'sapphirine', + 'sarcopside', + 'schorl', + 'selenide', + 'selenidel', + 'semiconductor', + 'semimetal', + 'shape memory effect', + 'shape memory', + 'siderotil', + 'silanide', + 'silicate', + 'silicide', + 'sillen-aurivillius', + 'skyrmion', + 'solidus projection', + 'solvus projection', + 'spessartine', + 'spin glass', + 'spinel', + 'spodumene', + 'squarate', + 'steenstrupine', + 'stibarsen', + 'subsolidus relations', + 'sulfamate', + 'sulfamide', + 'sulfate', + 'sulfide', + 'sulfidel', + 'sulfinylamide', + 'superconductor', + 'superhard', + 'superionic conductor', + 'superoxide', + 'talc', + 'tantalcarbide', + 'tellurantimony', + 'telluride', + 'ternary', + 'tetrahedron', + 'thermoelectric', + 'thiocyanate', + 'thiocyanurate', + 'thiophosphate', + 'thiosulfate', + 'topaz', + 'topological insulator', + 'tourmaline', + 'transitional', + 'triflate', + 'tripolyhedral', + 'triteride', + 'trithionate', + 'tritide', + 'tritium', + 'trona', + 'tungstate', + 'turquoise', + 'ulvoespinel', + 'unary', + 'uranophane', + 'uranopolycrase', + 'urate', + 'urea', + 'ureate', + 'van vleck paramagnet', + 'vanadate', + 'vertical section', + 'violurate', + 'xenotime', + 'zincobotryogen', + 'zircon', + 'zircosulfate', + ]; // Mapping of common element groups / periods to element symbol arrays const ELEMENT_GROUPS_MAP = { @@ -1035,623 +1035,562 @@ const OptimadeNLP = function () { return null; } - const mpds_props = [ - 'acceptor concentration', - 'acceptor to donor concentration', - 'activation energy', - 'adiabatic bulk modulus', - 'angle-resolved photoelectron spectra', - 'atomic structure', - 'band gap', - 'birefringence', - 'bremsstrahlung isochromat spectra', - 'charge carrier concentration', - 'charge carrier mobility', - 'charge transfer', - 'charge-density wave', - 'charge-transfer energy', - 'coefficient of schottky term in heat capacity', - 'coercive electric field', - 'coercive field', - 'coherence length', - 'cohesive energy', - 'compressibility', - 'conductivity', - 'core-electron contribution to magnetic susceptibility', - 'critical current density', - 'critical magnetic field', - 'crystal electric field parameter', - 'crystal electric field parameters', - 'crystal electric field splitting', - 'crystal electric field', - 'crystal field level', - 'crystalline structure', - 'crystal cell', - 'crystal structure', - 'curie coefficient', - 'curie temperature', - 'curie-weiss paramagnetism', - 'debye temperature', - 'decomposition temperature', - 'decomposition', - 'diamagnetic contribution to magnetic susceptibility', - 'dielectric constant', - 'dielectric loss tangent', - 'diffusion', - 'donor concentration', - 'donor energy', - 'effective charge', - 'effective electron number', - 'effective mass of electrons to holes ratio', - 'effective mass of electrons', - 'effective mass', - 'einstein temperature', - 'elastic compliance', - 'elastic moduli', - 'elastic stiffness coefficient', - 'elasticity', - 'electric field gradient', - 'electric polarization', - 'electrical conductivity', - 'electric properties', - 'electrical properties', - 'electrical resistivity', - 'electrochemical impedance spectroscopy', - 'electron density maps', - 'electron density of states at fermi level', - 'electron density of states', - 'electron energy band structure', - 'electron energy loss spectra', - 'electron grueneisen coefficient', - 'electron mobility', - 'electron paramagnetic resonance spectra', - 'electron spin resonance spectra', - 'electron-phonon interaction parameter', - 'electronic contribution to heat capacity', - 'electronic contribution to thermal conductivity', - 'electronic energy gap', - 'electronic heat capacity coefficient', - 'electronic properties', - 'energy at fermi level', - 'energy band structure', - 'energy gap for direct transition', - 'energy gap for indirect transition', - 'energy level diagram', - 'energy of optical phonon', - 'energy product', - 'energy', - 'enthalpy change at melting point', - 'enthalpy change at phase transition', - 'enthalpy change at structural transition', - 'enthalpy change', - 'enthalpy of formation', - 'enthalpy of reaction', - 'enthalpy', - 'entropy change at melting point', - 'entropy change at phase transition', - 'entropy of formation', - 'entropy of reaction', - 'entropy', - 'eutectoid decomposition', - 'exchange field', - 'exchange interaction parameter', - 'exciton energy', - 'extended x-ray absorption fine structure', - 'extraordinary refractive index', - 'fermi energy', - 'fermi surface', - 'ferroelasticity', - 'ferroelectric curie temperature', - 'ferroelectric hysteresis', - 'ferroelectric neel temperature', - 'ferroelectric transitions', - 'field dependence of resistivity', - 'figure of merit', - 'freezing temperature for spin glass', - 'fusion', - 'gibbs energy change', - 'gibbs energy of formation', - 'gibbs energy of reaction', - 'ginzburg-landau parameter', - 'grueneisen coefficient', - 'gruneisen coefficient', - 'hall coefficient', - 'hall effect', - 'hall mobility', - 'hardness', - 'heat capacity at constant pressure', - 'heat capacity at constant volume', - 'heat capacity coefficient', - 'heat capacity discontinuity at structural transition', - 'heat capacity discontinuity at superconducting transition', - 'heat capacity discontinuity', - 'heat capacity', - 'high-frequency permittivity', - 'hole mobility', - 'hydrogen diffusion', - 'hyperfine magnetic field', - 'imaginary part of magnetic susceptibility', - 'imaginary part of permittivity', - 'inelastic neutron scattering', - 'inelastic x-ray scattering', - 'infrared spectra', - 'ionic conductivity', - 'irreversibility field', - 'isomer shift', - 'isothermal bulk modulus', - 'isothermal linear compressibility', - 'isothermal volume compressibility', - 'knoop hardness', - 'kondo behavior', - 'kondo temperature', - 'lattice', - 'linear magnetostriction', - 'linear thermal expansion coefficient', - 'longitudinal sound velocity', - 'longitudinal-mode elastic coefficient', - 'lorentz number', - 'lower critical magnetic field', - 'lowest temperature of investigation', - 'luminescence lifetime', - 'luminescence', - 'magnetic anisotropy field', - 'magnetic anisotropy', - 'magnetic circular x-ray dichroism', - 'magnetic dichroism', - 'magnetic direction', - 'magnetic entropy', - 'magnetic field for magnetic transition', - 'magnetic field for structural transition', - 'magnetic heat capacity', - 'magnetic hysteresis', - 'magnetic moment', - 'magnetic order', - 'magnetic penetration depth', - 'magnetic phase diagram', - 'magnetic properties', - 'magnetic resistivity', - 'magnetic structure', - 'magnetic susceptibility', - 'magnetic transitions', - 'magnetism', - 'magnetization', - 'magneto-optical effects', - 'magneto-optical kerr effect', - 'magnetostriction', - 'mechanical properties', - 'melting temperature', - 'microhardness', - 'moessbauer spectra', - 'mohs hardness', - 'molar volume', - 'molecular field parameter', - 'muon spin spectra', - 'neel temperature', - 'neutron energy loss spectra', - 'non-linear optical properties', - 'non-linear optics', - 'nuclear magnetic resonance spectra', - 'nuclear quadrupolar resonance spectra', - 'optical absorption coefficient', - 'optical absorption', - 'optical conductivity', - 'optical phonons', - 'optical properties', - 'optical spectra', - 'orbital magnetic moment', - 'ordinary refractive index', - 'paraelectric curie coefficient', - 'paraelectric curie temperature', - 'paraelectric state', - 'paramagnetic curie temperature', - 'paramagnetic moment', - 'pauli magnetic susceptibility', - 'peritectic formation', - 'peritectoid formation', - 'permittivity', - 'perturbed angular correlation', - 'phase diagram', - 'phase diagrams', - 'phase transitions', - 'phonon contribution to thermal conductivity', - 'phonon density of states', - 'phonon dispersion', - 'phonon grueneisen coefficient', - 'phonon heat capacity at constant pressure', - 'phonons', - 'photo-conductivity data', - 'photo-conductivity', - 'photoelectron emission spectra', - 'photoluminescence spectra', - 'physical properties', - 'piezoelectric coefficient', - 'piezoelectric coefficient', - 'piezoelectricity', - 'plasma edge', - 'poisson ratio', - 'power factor', - 'pressure derivative of adiabatic bulk modulus', - 'pressure derivative of curie temperature', - 'pressure derivative of elastic stiffness coefficient', - 'pressure derivative of energy gap', - 'pressure derivative of isothermal bulk modulus', - 'pressure derivative of neel temperature', - 'pressure derivative of superconducting transition temperature', - 'pressure derivative of transition temperature', - 'pressure for magnetic transition', - 'pressure for metal-nonmetal transition', - 'pressure for structural transition', - 'pyroelectric coefficient', - 'pyroelectricity', - 'quadrupole splitting', - 'raman spectra', - 'real part of magnetic permeability', - 'real part of magnetic susceptibility', - 'real part of optical conductivity', - 'real part of permittivity', - 'reflectivity', - 'refractive index', - 'relative cooling power', - 'remanent induction', - 'remanent magnetic field', - 'remanent magnetic moment', - 'remanent magnetization', - 'remanent polarization', - 'residual resistivity ratio', - 'residual resistivity', - 'resistivity anisotropy', - 'resistivity', - 'resonance spectra', - 'saturation magnetic moment', - 'saturation magnetization', - 'second-harmonic generation', - 'seebeck coefficient', - 'shear modulus', - 'soft-x-ray emission spectra', - 'sound velocity', - 'spin contribution to magnetic susceptibility', - 'spin magnetic moment', - 'spin-fluctuation temperature', - 'spin-fluctuation', - 'spin-orbit splitting of valence band', - 'spin-resolved electron density of states at fermi level', - 'spontaneous elastic strain', - 'spontaneous magnetic moment', - 'spontaneous magnetization', - 'spontaneous polarization', - 'static permittivity', - 'stoner enhancement factor', - 'stoner parameter', - 'stoner product', - 'structural properties', - 'structural transition', - 'structural transitions', - 'superconducting transition temperature', - 'superconductivity energy gap', - 'superconductivity phenomena', - 'superconductivity', - 'temperature dependence of resistivity', - 'temperature dependence of static permittivity', - 'temperature derivative of elastic stiffness coefficient', - 'temperature derivative of energy gap', - 'temperature derivative of resistivity', - 'temperature derivative of upper critical magnetic field', - 'temperature for congruent melting', - 'temperature for eutectoid decomposition', - 'temperature for ferroelectric reordering', - 'temperature for magnetic transition', - 'temperature for metal-nonmetal transition', - 'temperature for peritectic formation', - 'temperature for peritectoid formation', - 'temperature for structural transition', - 'temperature-independent part of magnetic susceptibility', - 'thermal cell parameters change', - 'thermal conductivity', - 'thermal energy gap', - 'thermal expansion', - 'thermal properties', - 'thermal strain', - 'thermodynamic properties', - 'thermodynamics', - 'thermoelectric figure of merit', - 'thermoelectric power', - 'total energy calculation data', - 'transmittance', - 'transverse sound velocity', - 'type of magnetism', - 'upper critical magnetic field', - 'vacuum ultraviolet photoemission spectra', - 'valence', - 'van vleck contribution to magnetic susceptibility', - 'vibrational spectra', - 'vickers hardness number', - 'volume change at phase transition', - 'volume change at structural transition', - 'volume change', - 'volume magnetostriction', - 'volume thermal expansion coefficient', - 'wavelength for luminescence', - 'wavenumber of longitudinal optical phonon', - 'wavenumber of optical phonon', - 'wavenumber of transverse optical phonon', - 'work function', - 'x-ray absorption near-edge spectra', - 'x-ray absorption spectra', - 'x-ray photoemission spectra', - 'young modulus', - ]; - - /* - * Methods - */ - function is_numeric(n) { - return !isNaN(parseFloat(n)) && isFinite(n); - } - - /* - * Fix chemical formula if needed - */ - function termify_formulae(input, charred) { - if (input.includes('&#')) charred = true; - const re = charred ? /Ȉ(\d);/g : /%u208(\d)/g; - input = charred ? input : escape(input); - const matches = getMatchAll(input, re); - if (matches) { - for (let i = 0; i < matches.length; i++) { - input = input.replace(matches[i][0], matches[i][1]); - } - } - return unescape(input).replace(/^\(|\)$/g, ''); - } - - /* - * User input processing: brute-force similarity check - */ - function is_like_chem_formula(chk) { - //console.log('Checking formula'); - const len = chk.length; - - let checks; - - if (len > 10) return false; - // this cannot be no-index chemical formula - else if (len === 2) { - checks = [[chk.substr(0, 1), chk.substr(1, 1)]]; - } else if (len === 3) { - checks = [ - [chk.substr(0, 1), chk.substr(1, 1), chk.substr(2, 1)], - [chk.substr(0, 1), chk.substr(1, 2)], - [chk.substr(0, 2), chk.substr(2, 1)], - ]; - } else if (len === 4) { - checks = [ - [chk.substr(0, 2), chk.substr(2, 2)], - [chk.substr(0, 2), chk.substr(2, 1), chk.substr(3, 1)], - [chk.substr(0, 1), chk.substr(1, 1), chk.substr(2, 2)], - [chk.substr(0, 1), chk.substr(1, 2), chk.substr(3, 1)], - [ - chk.substr(0, 1), - chk.substr(1, 1), - chk.substr(2, 1), - chk.substr(3, 1), - ], - ]; - } else if (len === 5) { - checks = [ - [chk.substr(0, 1), chk.substr(1, 1), chk.substr(2, 1)], - [chk.substr(0, 1), chk.substr(1, 1), chk.substr(2, 2)], - [chk.substr(0, 2), chk.substr(2, 2), chk.substr(4, 1)], - [chk.substr(0, 1), chk.substr(1, 2), chk.substr(3, 2)], - [chk.substr(0, 1), chk.substr(1, 2), chk.substr(3, 1)], - [chk.substr(0, 2), chk.substr(2, 1), chk.substr(3, 1)], - [chk.substr(0, 2), chk.substr(2, 1), chk.substr(3, 2)], - ]; - } else { - // 6-9 - checks = [ - // NB too improbable to have 5 one-symbol elements in row - [chk.substr(0, 2), chk.substr(2, 2), chk.substr(4, 2)], // El-El-El - [ - chk.substr(0, 2), - chk.substr(2, 1), - chk.substr(3, 1), - chk.substr(4, 2), - ], // El-E-E-El - [ - chk.substr(0, 2), - chk.substr(2, 1), - chk.substr(3, 1), - chk.substr(4, 1), - chk.substr(5, 1), - ], // El-E-E-E-E - [ - chk.substr(0, 2), - chk.substr(2, 2), - chk.substr(4, 1), - chk.substr(5, 1), - ], // El-El-E-E - [ - chk.substr(0, 2), - chk.substr(2, 2), - chk.substr(4, 1), - chk.substr(5, 2), - ], // El-El-E-El - [ - chk.substr(0, 2), - chk.substr(2, 1), - chk.substr(3, 2), - chk.substr(5, 1), - ], // El-E-El-E - [ - chk.substr(0, 2), - chk.substr(2, 1), - chk.substr(3, 2), - chk.substr(5, 2), - ], // El-E-El-El - [ - chk.substr(0, 1), - chk.substr(1, 2), - chk.substr(3, 1), - chk.substr(4, 2), - ], // E-El-E-El - [ - chk.substr(0, 1), - chk.substr(1, 2), - chk.substr(3, 1), - chk.substr(4, 1), - chk.substr(5, 1), - ], // E-El-E-E-E - [ - chk.substr(0, 1), - chk.substr(1, 2), - chk.substr(3, 2), - chk.substr(5, 1), - ], // E-El-El-E - [ - chk.substr(0, 1), - chk.substr(1, 2), - chk.substr(3, 2), - chk.substr(5, 2), - ], // E-El-El-El - [ - chk.substr(0, 1), - chk.substr(1, 1), - chk.substr(2, 2), - chk.substr(4, 1), - chk.substr(5, 1), - ], // E-E-El-E-E - [ - chk.substr(0, 1), - chk.substr(1, 1), - chk.substr(2, 2), - chk.substr(4, 2), - ], // E-E-El-El - [ - chk.substr(0, 1), - chk.substr(1, 1), - chk.substr(2, 1), - chk.substr(3, 2), - chk.substr(5, 1), - ], // E-E-E-El-E - [ - chk.substr(0, 1), - chk.substr(1, 1), - chk.substr(2, 1), - chk.substr(3, 1), - chk.substr(4, 2), - ], // E-E-E-E-El - ]; - } - //console.log(checks); - - for (let i = 0; i < checks.length; i++) { - let signals = 0; - for (let j = 0; j < checks[i].length; j++) { - if (periodic_elements_cased.includes(checks[i][j])) signals++; - - if (signals === checks[i].length) { - //console.log(checks[i]); - return true; - } - } - } - return false; - } - - /* - * Detect facets: formulae, elements, lattices, and some classes - */ - function try_uniword_facet(term) { - if (term === 'AB' || term === 'ABC' || term === 'ABCD') return ['formulae']; // special case-sensitive anonymous cases - - term = term.toLowerCase(); - - const maybe_formula = !is_numeric(term.charAt(0)); - - const dmatches = getMatchAll(term, /(\d)/g); - if (dmatches && dmatches.length > 1 && maybe_formula) return ['formulae']; // no props with more than one digit - - const imatches = getMatchAll(escape(term), /%u208(\d)/g); - if (imatches && imatches.length && maybe_formula) return ['formulae']; // no props with subscripts - - if (periodic_elements.includes(term)) return ['elements', capitalize(term)]; - else if (periodic_element_names.includes(term)) - return [ - 'elements', - capitalize(periodic_elements[periodic_element_names.indexOf(term)]), - ]; - - if ( - term.includes('-') && - !term.split('-').some((part) => !periodic_elements.includes(part)) - ) { - return [ - 'elements', - term - .split('-') - .map((el) => capitalize(el)) - .join('-'), - ]; - } - - if (['element', 'elementary', 'unitary'].includes(term)) - return ['classes', 'unary']; - else if ( - term === 'quintenary' || - term === 'quinternary' || - term === 'quinternaries' || - term === 'quinaries' || - term === 'pentanary' || - term === 'pentanaries' - ) - return ['classes', 'quinary']; - else if (term === 'actinide' || term === 'actinides') - return ['classes', 'actinoid']; - else if ( - term === 'lantanide' || - term === 'lantanides' || - term === 'lanthanide' || - term === 'lanthanides' || - term === 'lantanoid' || - term === 'lantanoids' - ) - return ['classes', 'lanthanoid']; - else if (term.endsWith('ite') && term.length > 4) return ['classes']; + const mpds_props = [ + 'acceptor concentration', + 'acceptor to donor concentration', + 'activation energy', + 'adiabatic bulk modulus', + 'angle-resolved photoelectron spectra', + 'atomic structure', + 'band gap', + 'birefringence', + 'bremsstrahlung isochromat spectra', + 'charge carrier concentration', + 'charge carrier mobility', + 'charge transfer', + 'charge-density wave', + 'charge-transfer energy', + 'coefficient of schottky term in heat capacity', + 'coercive electric field', + 'coercive field', + 'coherence length', + 'cohesive energy', + 'compressibility', + 'conductivity', + 'core-electron contribution to magnetic susceptibility', + 'critical current density', + 'critical magnetic field', + 'crystal electric field parameter', + 'crystal electric field parameters', + 'crystal electric field splitting', + 'crystal electric field', + 'crystal field level', + 'crystalline structure', + 'crystal cell', + 'crystal structure', + 'curie coefficient', + 'curie temperature', + 'curie-weiss paramagnetism', + 'debye temperature', + 'decomposition temperature', + 'decomposition', + 'diamagnetic contribution to magnetic susceptibility', + 'dielectric constant', + 'dielectric loss tangent', + 'diffusion', + 'donor concentration', + 'donor energy', + 'effective charge', + 'effective electron number', + 'effective mass of electrons to holes ratio', + 'effective mass of electrons', + 'effective mass', + 'einstein temperature', + 'elastic compliance', + 'elastic moduli', + 'elastic stiffness coefficient', + 'elasticity', + 'electric field gradient', + 'electric polarization', + 'electrical conductivity', + 'electric properties', + 'electrical properties', + 'electrical resistivity', + 'electrochemical impedance spectroscopy', + 'electron density maps', + 'electron density of states at fermi level', + 'electron density of states', + 'electron energy band structure', + 'electron energy loss spectra', + 'electron grueneisen coefficient', + 'electron mobility', + 'electron paramagnetic resonance spectra', + 'electron spin resonance spectra', + 'electron-phonon interaction parameter', + 'electronic contribution to heat capacity', + 'electronic contribution to thermal conductivity', + 'electronic energy gap', + 'electronic heat capacity coefficient', + 'electronic properties', + 'energy at fermi level', + 'energy band structure', + 'energy gap for direct transition', + 'energy gap for indirect transition', + 'energy level diagram', + 'energy of optical phonon', + 'energy product', + 'energy', + 'enthalpy change at melting point', + 'enthalpy change at phase transition', + 'enthalpy change at structural transition', + 'enthalpy change', + 'enthalpy of formation', + 'enthalpy of reaction', + 'enthalpy', + 'entropy change at melting point', + 'entropy change at phase transition', + 'entropy of formation', + 'entropy of reaction', + 'entropy', + 'eutectoid decomposition', + 'exchange field', + 'exchange interaction parameter', + 'exciton energy', + 'extended x-ray absorption fine structure', + 'extraordinary refractive index', + 'fermi energy', + 'fermi surface', + 'ferroelasticity', + 'ferroelectric curie temperature', + 'ferroelectric hysteresis', + 'ferroelectric neel temperature', + 'ferroelectric transitions', + 'field dependence of resistivity', + 'figure of merit', + 'freezing temperature for spin glass', + 'fusion', + 'gibbs energy change', + 'gibbs energy of formation', + 'gibbs energy of reaction', + 'ginzburg-landau parameter', + 'grueneisen coefficient', + 'gruneisen coefficient', + 'hall coefficient', + 'hall effect', + 'hall mobility', + 'hardness', + 'heat capacity at constant pressure', + 'heat capacity at constant volume', + 'heat capacity coefficient', + 'heat capacity discontinuity at structural transition', + 'heat capacity discontinuity at superconducting transition', + 'heat capacity discontinuity', + 'heat capacity', + 'high-frequency permittivity', + 'hole mobility', + 'hydrogen diffusion', + 'hyperfine magnetic field', + 'imaginary part of magnetic susceptibility', + 'imaginary part of permittivity', + 'inelastic neutron scattering', + 'inelastic x-ray scattering', + 'infrared spectra', + 'ionic conductivity', + 'irreversibility field', + 'isomer shift', + 'isothermal bulk modulus', + 'isothermal linear compressibility', + 'isothermal volume compressibility', + 'knoop hardness', + 'kondo behavior', + 'kondo temperature', + 'lattice', + 'linear magnetostriction', + 'linear thermal expansion coefficient', + 'longitudinal sound velocity', + 'longitudinal-mode elastic coefficient', + 'lorentz number', + 'lower critical magnetic field', + 'lowest temperature of investigation', + 'luminescence lifetime', + 'luminescence', + 'magnetic anisotropy field', + 'magnetic anisotropy', + 'magnetic circular x-ray dichroism', + 'magnetic dichroism', + 'magnetic direction', + 'magnetic entropy', + 'magnetic field for magnetic transition', + 'magnetic field for structural transition', + 'magnetic heat capacity', + 'magnetic hysteresis', + 'magnetic moment', + 'magnetic order', + 'magnetic penetration depth', + 'magnetic phase diagram', + 'magnetic properties', + 'magnetic resistivity', + 'magnetic structure', + 'magnetic susceptibility', + 'magnetic transitions', + 'magnetism', + 'magnetization', + 'magneto-optical effects', + 'magneto-optical kerr effect', + 'magnetostriction', + 'mechanical properties', + 'melting temperature', + 'microhardness', + 'moessbauer spectra', + 'mohs hardness', + 'molar volume', + 'molecular field parameter', + 'muon spin spectra', + 'neel temperature', + 'neutron energy loss spectra', + 'non-linear optical properties', + 'non-linear optics', + 'nuclear magnetic resonance spectra', + 'nuclear quadrupolar resonance spectra', + 'optical absorption coefficient', + 'optical absorption', + 'optical conductivity', + 'optical phonons', + 'optical properties', + 'optical spectra', + 'orbital magnetic moment', + 'ordinary refractive index', + 'paraelectric curie coefficient', + 'paraelectric curie temperature', + 'paraelectric state', + 'paramagnetic curie temperature', + 'paramagnetic moment', + 'pauli magnetic susceptibility', + 'peritectic formation', + 'peritectoid formation', + 'permittivity', + 'perturbed angular correlation', + 'phase diagram', + 'phase diagrams', + 'phase transitions', + 'phonon contribution to thermal conductivity', + 'phonon density of states', + 'phonon dispersion', + 'phonon grueneisen coefficient', + 'phonon heat capacity at constant pressure', + 'phonons', + 'photo-conductivity data', + 'photo-conductivity', + 'photoelectron emission spectra', + 'photoluminescence spectra', + 'physical properties', + 'piezoelectric coefficient', + 'piezoelectric coefficient', + 'piezoelectricity', + 'plasma edge', + 'poisson ratio', + 'power factor', + 'pressure derivative of adiabatic bulk modulus', + 'pressure derivative of curie temperature', + 'pressure derivative of elastic stiffness coefficient', + 'pressure derivative of energy gap', + 'pressure derivative of isothermal bulk modulus', + 'pressure derivative of neel temperature', + 'pressure derivative of superconducting transition temperature', + 'pressure derivative of transition temperature', + 'pressure for magnetic transition', + 'pressure for metal-nonmetal transition', + 'pressure for structural transition', + 'pyroelectric coefficient', + 'pyroelectricity', + 'quadrupole splitting', + 'raman spectra', + 'real part of magnetic permeability', + 'real part of magnetic susceptibility', + 'real part of optical conductivity', + 'real part of permittivity', + 'reflectivity', + 'refractive index', + 'relative cooling power', + 'remanent induction', + 'remanent magnetic field', + 'remanent magnetic moment', + 'remanent magnetization', + 'remanent polarization', + 'residual resistivity ratio', + 'residual resistivity', + 'resistivity anisotropy', + 'resistivity', + 'resonance spectra', + 'saturation magnetic moment', + 'saturation magnetization', + 'second-harmonic generation', + 'seebeck coefficient', + 'shear modulus', + 'soft-x-ray emission spectra', + 'sound velocity', + 'spin contribution to magnetic susceptibility', + 'spin magnetic moment', + 'spin-fluctuation temperature', + 'spin-fluctuation', + 'spin-orbit splitting of valence band', + 'spin-resolved electron density of states at fermi level', + 'spontaneous elastic strain', + 'spontaneous magnetic moment', + 'spontaneous magnetization', + 'spontaneous polarization', + 'static permittivity', + 'stoner enhancement factor', + 'stoner parameter', + 'stoner product', + 'structural properties', + 'structural transition', + 'structural transitions', + 'superconducting transition temperature', + 'superconductivity energy gap', + 'superconductivity phenomena', + 'superconductivity', + 'temperature dependence of resistivity', + 'temperature dependence of static permittivity', + 'temperature derivative of elastic stiffness coefficient', + 'temperature derivative of energy gap', + 'temperature derivative of resistivity', + 'temperature derivative of upper critical magnetic field', + 'temperature for congruent melting', + 'temperature for eutectoid decomposition', + 'temperature for ferroelectric reordering', + 'temperature for magnetic transition', + 'temperature for metal-nonmetal transition', + 'temperature for peritectic formation', + 'temperature for peritectoid formation', + 'temperature for structural transition', + 'temperature-independent part of magnetic susceptibility', + 'thermal cell parameters change', + 'thermal conductivity', + 'thermal energy gap', + 'thermal expansion', + 'thermal properties', + 'thermal strain', + 'thermodynamic properties', + 'thermodynamics', + 'thermoelectric figure of merit', + 'thermoelectric power', + 'total energy calculation data', + 'transmittance', + 'transverse sound velocity', + 'type of magnetism', + 'upper critical magnetic field', + 'vacuum ultraviolet photoemission spectra', + 'valence', + 'van vleck contribution to magnetic susceptibility', + 'vibrational spectra', + 'vickers hardness number', + 'volume change at phase transition', + 'volume change at structural transition', + 'volume change', + 'volume magnetostriction', + 'volume thermal expansion coefficient', + 'wavelength for luminescence', + 'wavenumber of longitudinal optical phonon', + 'wavenumber of optical phonon', + 'wavenumber of transverse optical phonon', + 'work function', + 'x-ray absorption near-edge spectra', + 'x-ray absorption spectra', + 'x-ray photoemission spectra', + 'young modulus', + ]; + + /* + * Methods + */ + function is_numeric(n) { + return !isNaN(parseFloat(n)) && isFinite(n); + } + + /* + * Fix chemical formula if needed + */ + function termify_formulae(input, charred) { + if (input.includes('&#')) charred = true; + const re = charred ? /Ȉ(\d);/g : /%u208(\d)/g; + input = charred ? input : escape(input); + const matches = getMatchAll(input, re); + if (matches) { + for (let i = 0; i < matches.length; i++) { + input = input.replace(matches[i][0], matches[i][1]); + } + } + return unescape(input).replace(/^\(|\)$/g, ''); + } + + /* + * User input processing: brute-force similarity check + */ + function is_like_chem_formula(chk) { + //console.log('Checking formula'); + const len = chk.length; + + let checks; + + if (len > 10) return false; + // this cannot be no-index chemical formula + else if (len === 2) { + checks = [[chk.substr(0, 1), chk.substr(1, 1)]]; + } else if (len === 3) { + checks = [ + [chk.substr(0, 1), chk.substr(1, 1), chk.substr(2, 1)], + [chk.substr(0, 1), chk.substr(1, 2)], + [chk.substr(0, 2), chk.substr(2, 1)], + ]; + } else if (len === 4) { + checks = [ + [chk.substr(0, 2), chk.substr(2, 2)], + [chk.substr(0, 2), chk.substr(2, 1), chk.substr(3, 1)], + [chk.substr(0, 1), chk.substr(1, 1), chk.substr(2, 2)], + [chk.substr(0, 1), chk.substr(1, 2), chk.substr(3, 1)], + [chk.substr(0, 1), chk.substr(1, 1), chk.substr(2, 1), chk.substr(3, 1)], + ]; + } else if (len === 5) { + checks = [ + [chk.substr(0, 1), chk.substr(1, 1), chk.substr(2, 1)], + [chk.substr(0, 1), chk.substr(1, 1), chk.substr(2, 2)], + [chk.substr(0, 2), chk.substr(2, 2), chk.substr(4, 1)], + [chk.substr(0, 1), chk.substr(1, 2), chk.substr(3, 2)], + [chk.substr(0, 1), chk.substr(1, 2), chk.substr(3, 1)], + [chk.substr(0, 2), chk.substr(2, 1), chk.substr(3, 1)], + [chk.substr(0, 2), chk.substr(2, 1), chk.substr(3, 2)], + ]; + } else { + // 6-9 + checks = [ + // NB too improbable to have 5 one-symbol elements in row + [chk.substr(0, 2), chk.substr(2, 2), chk.substr(4, 2)], // El-El-El + [chk.substr(0, 2), chk.substr(2, 1), chk.substr(3, 1), chk.substr(4, 2)], // El-E-E-El + [ + chk.substr(0, 2), + chk.substr(2, 1), + chk.substr(3, 1), + chk.substr(4, 1), + chk.substr(5, 1), + ], // El-E-E-E-E + [chk.substr(0, 2), chk.substr(2, 2), chk.substr(4, 1), chk.substr(5, 1)], // El-El-E-E + [chk.substr(0, 2), chk.substr(2, 2), chk.substr(4, 1), chk.substr(5, 2)], // El-El-E-El + [chk.substr(0, 2), chk.substr(2, 1), chk.substr(3, 2), chk.substr(5, 1)], // El-E-El-E + [chk.substr(0, 2), chk.substr(2, 1), chk.substr(3, 2), chk.substr(5, 2)], // El-E-El-El + [chk.substr(0, 1), chk.substr(1, 2), chk.substr(3, 1), chk.substr(4, 2)], // E-El-E-El + [ + chk.substr(0, 1), + chk.substr(1, 2), + chk.substr(3, 1), + chk.substr(4, 1), + chk.substr(5, 1), + ], // E-El-E-E-E + [chk.substr(0, 1), chk.substr(1, 2), chk.substr(3, 2), chk.substr(5, 1)], // E-El-El-E + [chk.substr(0, 1), chk.substr(1, 2), chk.substr(3, 2), chk.substr(5, 2)], // E-El-El-El + [ + chk.substr(0, 1), + chk.substr(1, 1), + chk.substr(2, 2), + chk.substr(4, 1), + chk.substr(5, 1), + ], // E-E-El-E-E + [chk.substr(0, 1), chk.substr(1, 1), chk.substr(2, 2), chk.substr(4, 2)], // E-E-El-El + [ + chk.substr(0, 1), + chk.substr(1, 1), + chk.substr(2, 1), + chk.substr(3, 2), + chk.substr(5, 1), + ], // E-E-E-El-E + [ + chk.substr(0, 1), + chk.substr(1, 1), + chk.substr(2, 1), + chk.substr(3, 1), + chk.substr(4, 2), + ], // E-E-E-E-El + ]; + } + //console.log(checks); + + for (let i = 0; i < checks.length; i++) { + let signals = 0; + for (let j = 0; j < checks[i].length; j++) { + if (periodic_elements_cased.includes(checks[i][j])) signals++; + + if (signals === checks[i].length) { + //console.log(checks[i]); + return true; + } + } + } + return false; + } + + /* + * Detect facets: formulae, elements, lattices, and some classes + */ + function try_uniword_facet(term) { + if (term === 'AB' || term === 'ABC' || term === 'ABCD') return ['formulae']; // special case-sensitive anonymous cases + + term = term.toLowerCase(); + + const maybe_formula = !is_numeric(term.charAt(0)); + + const dmatches = getMatchAll(term, /(\d)/g); + if (dmatches && dmatches.length > 1 && maybe_formula) return ['formulae']; // no props with more than one digit + + const imatches = getMatchAll(escape(term), /%u208(\d)/g); + if (imatches && imatches.length && maybe_formula) return ['formulae']; // no props with subscripts + + if (periodic_elements.includes(term)) return ['elements', capitalize(term)]; + else if (periodic_element_names.includes(term)) + return [ 'elements', capitalize(periodic_elements[periodic_element_names.indexOf(term)]) ]; + + if ( + term.includes('-') && + !term.split('-').some((part) => !periodic_elements.includes(part)) + ) { + return [ 'elements', term.split('-').map((el) => capitalize(el)).join('-') ]; + } + + if (['element', 'elementary', 'unitary'].includes(term)) return ['classes', 'unary']; + else if ( + term === 'quintenary' || + term === 'quinternary' || + term === 'quinternaries' || + term === 'quinaries' || + term === 'pentanary' || + term === 'pentanaries' + ) + return ['classes', 'quinary']; + else if (term === 'actinide' || term === 'actinides') return ['classes', 'actinoid']; + else if ( + term === 'lantanide' || + term === 'lantanides' || + term === 'lanthanide' || + term === 'lanthanides' || + term === 'lantanoid' || + term === 'lantanoids' + ) + return ['classes', 'lanthanoid']; + else if (term.endsWith('ite') && term.length > 4) return ['classes']; // direct element-group single-word matches (tetrels, triels, chalcogen, etc.) const groupEls = getGroupElements(term); if (groupEls) return ['elements', groupEls.join('-')]; - const chk = term - .replace(' structure', '') - .replace(' lattice', '') - .replace(' crystalline', '') - .replace(' crystal', ''); - if (lat_fgrs.includes(chk)) return ['lattices', lat_i2p[lat_p2i[chk]]]; - - if (term.length <= 9 && dmatches && maybe_formula) return ['formulae']; // no SHORT props with digits (NB L0, E1) - - return false; - } - - /* - * Detect facets: classes, props - */ - function try_multiword_facet(term, queue) { - term = term.toLowerCase(); - - let candidate = false, - combined = false, - orig = false; - - if (queue.length) { - orig = term; - combined = true; - queue.forEach(function ({ input }) { - term = `${input} ${term}`; - }); - } - //console.log("CHECKING TERM FOR MULTI-FACET: "+term); + const chk = term + .replace(' structure', '') + .replace(' lattice', '') + .replace(' crystalline', '') + .replace(' crystal', ''); + if (lat_fgrs.includes(chk)) return ['lattices', lat_i2p[lat_p2i[chk]]]; + + if (term.length <= 9 && dmatches && maybe_formula) return ['formulae']; // no SHORT props with digits (NB L0, E1) + + return false; + } + + /* + * Detect facets: classes, props + */ + function try_multiword_facet(term, queue) { + term = term.toLowerCase(); + + let candidate = false, + combined = false, + orig = false; + + if (queue.length) { + orig = term; + combined = true; + queue.forEach(function ({ input }) { + term = `${input} ${term}`; + }); + } + //console.log("CHECKING TERM FOR MULTI-FACET: "+term); // Special handling: phrases like 'period 2' or 'group 11' const pgMatch = term.match(/^(period|group)\s+(\d{1,2})$/); @@ -1663,208 +1602,206 @@ const OptimadeNLP = function () { if (els) return { facet: 'elements', input: els.join('-'), ready: 1 }; } - candidate = check_category(term, 'classes'); - if (candidate) { - if (combined) candidate.combined = true; - return candidate; - } - - candidate = check_category(term, 'props'); - if (candidate) { - if (combined) candidate.combined = true; - return candidate; - } - - let single_chk; - if (term.endsWith('s')) { - // plural-singular fixups - single_chk = term.substr(0, term.length - 1); - - if (!combined && single_chk.endsWith('ite')) - return { facet: 'classes', input: single_chk, ready: 1 }; - - candidate = check_category(single_chk, 'classes'); - if (candidate) { - if (combined) candidate.combined = true; - return candidate; - } - } - if (term.endsWith('es')) { - // plural-singular fixups - single_chk = term.substr(0, term.length - 2); - - if (single_chk === 'binari') single_chk = 'binary'; - else if (single_chk === 'ternari') single_chk = 'ternary'; - else if (single_chk === 'quaternari') single_chk = 'quaternary'; - - candidate = check_category(single_chk, 'classes'); - if (candidate) { - if (combined) candidate.combined = true; - return candidate; - } - } - - if (!candidate && orig) return try_multiword_facet(orig, []); - return false; - } - - /* - * Utility algo - */ - function check_category(term, category) { - const host = category === 'classes' ? mpds_classes : mpds_props; - - if (host.includes(term)) return { facet: category, input: term, ready: 1 }; - - const len = host.length; - const re = new RegExp(`(?:^|\\s)(${term})(?=\\s|$)`); - let idx; - for (let i = 0; i < len; i++) { - idx = host[i].search(re); - if (idx === 0) { - //console.log("Found unstrict match in "+category+" with <"+host[i]+">"); - return { facet: category, input: term, anew: 1 }; - } - } - return false; - } - - /* - * Chemical formula: ABC3 vs. SrTiO3 - */ - function is_formula_anonymous(formula) { - const detect = new RegExp( - /A(\d{0,3})B(\d{0,3})(C(\d{0,3})(D(\d{0,3}))?)?\b/ - ); - return formula.charAt(0) === 'A' && detect.test(formula); - } - - /* - * Get center and ligand information from a string - */ - function parse_ligand(string, start) { - const center = string.slice(0, start).toLowerCase(); - - if ( - string.slice(start, start + 1).toLowerCase() === 'x' && - string.slice(start, start + 2).toLowerCase() !== 'xe' - ) { - if (string.slice(start).length === 1) return [center, 'X']; - - return [center, 'X' + string.slice(start + 1)]; - } - - if (string.length === start) return [center, 'X']; - - const remainder = string.slice(start); - - if (is_numeric(remainder.slice(0, 1)) && start === 2) - return parse_ligand(string, 1); - - return [center, capitalize(remainder)]; - } - - /* - * Get center and ligand information from a string - */ - function _parse_aeatoms(string) { - const pos = string.indexOf('-'); - - if (pos !== -1) { - const center = string.slice(0, pos), - ligand = string.slice(pos + 1); - - if (center.length > 2) return false; - - return parse_ligand(center + ligand, center.length); - } - - const trials = [2, 1], - periodic_elements_xed = ['x'].concat(periodic_elements); - - for (let start = 0; start < 2; start++) { - if ( - string.length >= trials[start] && - periodic_elements_xed.indexOf( - string.slice(0, trials[start]).toLowerCase() - ) !== -1 - ) { - return parse_ligand(string, trials[start]); - } - } - return false; - } - - /** - * Get center and ligand information from a string - */ - function parse_aeatoms(string) { - const parsed = _parse_aeatoms(string); - - if (!parsed) return ['?', '?']; - - return [capitalize(parsed[0]), formula_to_tags(parsed[1])]; - } - - /* - * Add HTML tags to a chemical formula as a string - */ - function formula_to_tags(string) { - let sub = false, - html_formula = ''; - - for (let i = 0, len = string.length; i < len; i++) { - if (is_numeric(string[i]) || string[i] === '.') { - if (!sub) { - html_formula += ''; - sub = true; - } - } else { - if (sub) { - html_formula += ''; - sub = false; - } - } - html_formula += string[i]; - } - if (sub) html_formula += ''; - return html_formula; - } - - /* - * User input processing: main algorithm - */ - function guess(inputstr) { - // *pseudo_numerics* - /*if (inputstr.includes('c/a ') || inputstr.includes('a/b ') || inputstr.includes('b/c ')) { + candidate = check_category(term, 'classes'); + if (candidate) { + if (combined) candidate.combined = true; + return candidate; + } + + candidate = check_category(term, 'props'); + if (candidate) { + if (combined) candidate.combined = true; + return candidate; + } + + let single_chk; + if (term.endsWith('s')) { + // plural-singular fixups + single_chk = term.substr(0, term.length - 1); + + if (!combined && single_chk.endsWith('ite')) + return { facet: 'classes', input: single_chk, ready: 1 }; + + candidate = check_category(single_chk, 'classes'); + if (candidate) { + if (combined) candidate.combined = true; + return candidate; + } + } + if (term.endsWith('es')) { + // plural-singular fixups + single_chk = term.substr(0, term.length - 2); + + if (single_chk === 'binari') single_chk = 'binary'; + else if (single_chk === 'ternari') single_chk = 'ternary'; + else if (single_chk === 'quaternari') single_chk = 'quaternary'; + + candidate = check_category(single_chk, 'classes'); + if (candidate) { + if (combined) candidate.combined = true; + return candidate; + } + } + + if (!candidate && orig) return try_multiword_facet(orig, []); + return false; + } + + /* + * Utility algo + */ + function check_category(term, category) { + const host = category === 'classes' ? mpds_classes : mpds_props; + + if (host.includes(term)) return { facet: category, input: term, ready: 1 }; + + const len = host.length; + const re = new RegExp(`(?:^|\\s)(${term})(?=\\s|$)`); + let idx; + for (let i = 0; i < len; i++) { + idx = host[i].search(re); + if (idx === 0) { + //console.log("Found unstrict match in "+category+" with <"+host[i]+">"); + return { facet: category, input: term, anew: 1 }; + } + } + return false; + } + + /* + * Chemical formula: ABC3 vs. SrTiO3 + */ + function is_formula_anonymous(formula) { + const detect = new RegExp(/A(\d{0,3})B(\d{0,3})(C(\d{0,3})(D(\d{0,3}))?)?\b/); + return formula.charAt(0) === 'A' && detect.test(formula); + } + + /* + * Get center and ligand information from a string + */ + function parse_ligand(string, start) { + + const center = string.slice(0, start).toLowerCase(); + + if (string.slice(start, start + 1).toLowerCase() === 'x' && string.slice(start, start + 2).toLowerCase() !== 'xe'){ + if (string.slice(start).length === 1) return [center, 'X']; + + return [center, 'X' + string.slice(start + 1)]; + } + + if (string.length === start) return [center, 'X']; + + const remainder = string.slice(start); + + if (is_numeric(remainder.slice(0, 1)) && start === 2) + return parse_ligand(string, 1); + + return [center, capitalize(remainder)]; + } + + /* + * Get center and ligand information from a string + */ + function _parse_aeatoms(string) { + + const pos = string.indexOf('-'); + + if (pos !== -1){ + const center = string.slice(0, pos), + ligand = string.slice(pos + 1); + + if (center.length > 2) return false; + + return parse_ligand(center + ligand, center.length); + } + + const trials = [2, 1], + periodic_elements_xed = ["x"].concat(periodic_elements); + + for (let start = 0; start < 2; start++){ + if (string.length >= trials[start] && periodic_elements_xed.indexOf(string.slice(0, trials[start]).toLowerCase()) !== -1){ + return parse_ligand(string, trials[start]); + } + } + return false; + } + + /** + * Get center and ligand information from a string + */ + function parse_aeatoms(string) { + + const parsed = _parse_aeatoms(string); + + if (!parsed) return ['?', '?']; + + return [ + capitalize(parsed[0]), + formula_to_tags(parsed[1]) + ]; + } + + /* + * Add HTML tags to a chemical formula as a string + */ + function formula_to_tags(string) { + + let sub = false, + html_formula = ''; + + for (let i = 0, len = string.length; i < len; i++){ + if (is_numeric(string[i]) || string[i] === '.'){ + if (!sub){ + html_formula += ''; + sub = true; + } + } else { + if (sub){ + html_formula += ''; + sub = false; + } + } + html_formula += string[i]; + } + if (sub) html_formula += ''; + return html_formula; + } + + /* + * User input processing: main algorithm + */ + function guess(inputstr) { + + // *pseudo_numerics* + /*if (inputstr.includes('c/a ') || inputstr.includes('a/b ') || inputstr.includes('b/c ')) { // FIXME slashes in names if (inputstr.includes('c/a ')) inputstr = inputstr.replace('c/a ', 'c--a '); if (inputstr.includes('a/b ')) inputstr = inputstr.replace('a/b ', 'a--b '); if (inputstr.includes('b/c ')) inputstr = inputstr.replace('b/c ', 'b--c '); }*/ - if (inputstr.includes('"')) return { ignored: inputstr }; // Optimade guard - - const tokens = inputstr - .replace(new RegExp('\\+|\\!|\\?', 'g'), '') - .replace(new RegExp(',|/', 'g'), ' ') - .replace(new RegExp('<', 'g'), ' < ') - .replace(new RegExp('>', 'g'), ' > ') - .replace(new RegExp('=', 'g'), ' = ') - .split(/\s+/); - - const result = {}; - let n_terms = 0; - let n_toks = 1; - let queue = []; - const ignored = []; - //console.log(tokens); - // TODO: gracefully discard brackets - - tokens.forEach(function (input) { - let facet = false, - simple = false; - input = input.trim(); + if (inputstr.includes('\"')) return {'ignored': inputstr}; // Optimade guard + + const tokens = inputstr + .replace(new RegExp('\\+|\\!|\\?', 'g'), '') + .replace(new RegExp(',|/', 'g'), ' ') + .replace(new RegExp('<', 'g'), ' < ') + .replace(new RegExp('>', 'g'), ' > ') + .replace(new RegExp('=', 'g'), ' = ') + .split(/\s+/); + + const result = {}; + let n_terms = 0; + let n_toks = 1; + let queue = []; + const ignored = []; + //console.log(tokens); + // TODO: gracefully discard brackets + + tokens.forEach(function (input) { + let facet = false, + simple = false; + input = input.trim(); const linput = input.toLowerCase(); // Allow 'period' and 'group' to be queued so they can combine with a following number @@ -1875,13 +1812,13 @@ const OptimadeNLP = function () { return; } - if ( - input.includes('<') || - input.includes('>') || - input.includes('=') || - is_numeric(input) - ) { - // numeric searches + if ( + input.includes('<') || + input.includes('>') || + input.includes('=') || + is_numeric(input) + ) { + // numeric searches // If this numeric token is actually the second part of 'period N' or 'group N', // don't treat it as a numeric filter but allow multiword facet combining. if ( @@ -1892,394 +1829,366 @@ const OptimadeNLP = function () { ) { // fall-through to normal multiword processing } else if ( - input.indexOf('<') === 0 || - input.indexOf('>') === 0 || - input.indexOf('=') === 0 - ) { - // separated op sign - if (result.props) { - if (!result.numeric) result.numeric = []; - result.numeric.push([result.props, input.substr(0, 1)]); - delete result.props; - } else if ( - result.numeric && - result.numeric[result.numeric.length - 1] - ) { - result.numeric[result.numeric.length - 1][1] = input.substr(0, 1); // TODO account <=, =<, >=, => - } else ignored.push(input); - } else if (is_numeric(input)) { - if (result.props) { - if (!result.numeric) result.numeric = []; - result.numeric.push([result.props, '=', parseFloat(input)]); - delete result.props; - } else if ( - result.numeric && - result.numeric[result.numeric.length - 1] - ) { - result.numeric[result.numeric.length - 1][2] = parseFloat(input); // NB no commas! - if (!result.numeric[result.numeric.length - 1][1]) - result.numeric[result.numeric.length - 1][1] = '='; - } else ignored.push(input); - } - return; - } - - if (!queue.length || (input != 'at' && input != 'in')) { - // FIXME TODO special treatment, words vs. chemical symbols - simple = try_uniword_facet(input); - } - - if (simple) { - facet = simple[0]; - if (simple[1]) input = simple[1]; - if (queue.length && !queue[queue.length - 1].ready) - ignored.push(...queue.map((obj) => obj.input)); - queue = []; - //console.log(input + ": found simple facet " + simple[0]); - } else { - if (!stop_words.includes(input)) { - input = input - .replace(new RegExp('\\(', 'g'), '') - .replace(new RegExp('\\)', 'g'), '') - .replace(new RegExp('\\[', 'g'), '') - .replace(new RegExp('\\]', 'g'), ''); - - const candidate = try_multiword_facet(input, queue); - //console.log(candidate); - - if (candidate.combined) queue = []; - - if (candidate.ready) { - // term found either alone or with previous - facet = candidate.facet; - input = candidate.input; - queue = [candidate]; - } else if (candidate.anew) { - // token anew - if (queue.length && !queue[queue.length - 1].ready) - ignored.push(...queue.map((obj) => obj.input)); - queue = [candidate]; - } else if (!candidate) { - // token unknown - if (queue.length && !queue[queue.length - 1].ready) - ignored.push(...queue.map((obj) => obj.input)); - queue = []; - if (is_like_chem_formula(input)) { - facet = 'formulae'; - } - if (!facet) { - ignored.push(input); - } - } - } - } - - if (n_toks === tokens.length) { - // token at the end, terminating - if (queue.length && !queue[queue.length - 1].ready) - ignored.push(...queue.map((obj) => obj.input)); - queue = []; - } - - if (facet) { - if (facet === 'formulae') input = termify_formulae(input); - - if (result[facet]) { - // What to do with the found term of the same category? - //console.log('Compare: '+result[facet]+' vs. '+input); - - if (facet === 'formulae') { - ignored.push(input); - } else if (facet === 'elements') { - result[facet] += `-${input}`; - } else if (facet === 'classes') { - result[facet] += `, ${input}`; //escape(input); - } else if (facet === 'props') { - if (input.includes(result[facet])) { - //console.log('Smaller match '+result[facet]+' was thrown away'); - result[facet] = input; - } else { - ignored.push(input); - } - } - } else result[facet] = input; //escape(input); - - n_terms++; - } - n_toks++; - }); - - result.ignored = ignored; - return result; - } - - /* - * Chemical formulae parsing, courtesy of Nathan Leung, MIT license - */ - function tokenizeInitial(formula, tokens = []) { - if (formula.length === 0) { - return tokens; - } - - // Tokenize parentheses - if (formula[0] === '(' || formula[0] === '[') { - return tokenizeInitial(formula.substr(1), [ - ...tokens, - { - type: 'parenthesis', - value: 'open', - }, - ]); - } - if (formula[0] === ')' || formula[0] === ']') { - return tokenizeAfterElement(formula.substr(1), [ - ...tokens, - { - type: 'parenthesis', - value: 'close', - }, - ]); - } - - // Tokenize coefficient - let num = ''; - for (let i = 0; i < formula.length; i++) { - // If the char is a number - if (!isNaN(Number(formula[i]))) { - num = num + formula[i]; - } else { - // End loop if char is not a number - break; - } - } - // If a coefficient exists - if (num !== '') { - return tokenizeInitial(formula.substr(num.length), [ - ...tokens, - { - type: 'coefficient', - value: Number(num), - }, - ]); - } - - // Tokenize element - // Check if current char is uppercase letter - const char = formula[0]; - const code = char.charCodeAt(0); - if (code >= 65 && code <= 90) { - // If next char is a lowercase letter - if (formula.length > 1) { - const nextChar = formula[1]; - const nextCode = nextChar.charCodeAt(0); - if (nextCode >= 97 && nextCode <= 122) { - return tokenizeAfterElement(formula.substr(2), [ - ...tokens, - { - type: 'element', - value: char + nextChar, - }, - ]); - } - } - return tokenizeAfterElement(formula.substr(1), [ - ...tokens, - { - type: 'element', - value: char, - }, - ]); - } - - // If it doesn't match anything - throw Error(`There was an error parsing formula. We were able to get to here:\n${JSON.stringify( - tokens, - null, - 2 - )} + input.indexOf('<') === 0 || + input.indexOf('>') === 0 || + input.indexOf('=') === 0 + ) { + // separated op sign + if (result.props) { + if (!result.numeric) result.numeric = []; + result.numeric.push([result.props, input.substr(0, 1)]); + delete result.props; + } else if (result.numeric && result.numeric[result.numeric.length - 1]) { + result.numeric[result.numeric.length - 1][1] = input.substr(0, 1); // TODO account <=, =<, >=, => + } else ignored.push(input); + } else if (is_numeric(input)) { + if (result.props) { + if (!result.numeric) result.numeric = []; + result.numeric.push([result.props, '=', parseFloat(input)]); + delete result.props; + } else if (result.numeric && result.numeric[result.numeric.length - 1]) { + result.numeric[result.numeric.length - 1][2] = parseFloat(input); // NB no commas! + if (!result.numeric[result.numeric.length - 1][1]) + result.numeric[result.numeric.length - 1][1] = '='; + } else ignored.push(input); + } + return; + } + + if (!queue.length || (input != 'at' && input != 'in')) { + // FIXME TODO special treatment, words vs. chemical symbols + simple = try_uniword_facet(input); + } + + if (simple) { + facet = simple[0]; + if (simple[1]) input = simple[1]; + if (queue.length && !queue[queue.length - 1].ready) + ignored.push(...queue.map((obj) => obj.input)); + queue = []; + //console.log(input + ": found simple facet " + simple[0]); + } else { + if (!stop_words.includes(input)) { + input = input + .replace(new RegExp('\\(', 'g'), '') + .replace(new RegExp('\\)', 'g'), '') + .replace(new RegExp('\\[', 'g'), '') + .replace(new RegExp('\\]', 'g'), ''); + + const candidate = try_multiword_facet(input, queue); + //console.log(candidate); + + if (candidate.combined) queue = []; + + if (candidate.ready) { + // term found either alone or with previous + facet = candidate.facet; + input = candidate.input; + queue = [candidate]; + } else if (candidate.anew) { + // token anew + if (queue.length && !queue[queue.length - 1].ready) + ignored.push(...queue.map((obj) => obj.input)); + queue = [candidate]; + } else if (!candidate) { + // token unknown + if (queue.length && !queue[queue.length - 1].ready) + ignored.push(...queue.map((obj) => obj.input)); + queue = []; + if (is_like_chem_formula(input)) { + facet = 'formulae'; + } + if (!facet) { + ignored.push(input); + } + } + } + } + + if (n_toks === tokens.length) { + // token at the end, terminating + if (queue.length && !queue[queue.length - 1].ready) + ignored.push(...queue.map((obj) => obj.input)); + queue = []; + } + + if (facet) { + if (facet === 'formulae') input = termify_formulae(input); + + if (result[facet]) { + // What to do with the found term of the same category? + //console.log('Compare: '+result[facet]+' vs. '+input); + + if (facet === 'formulae') { + ignored.push(input); + } else if (facet === 'elements') { + result[facet] += `-${input}`; + } else if (facet === 'classes') { + result[facet] += `, ${input}`; //escape(input); + } else if (facet === 'props') { + if (input.includes(result[facet])) { + //console.log('Smaller match '+result[facet]+' was thrown away'); + result[facet] = input; + } else { + ignored.push(input); + } + } + } else result[facet] = input; //escape(input); + + n_terms++; + } + n_toks++; + }); + + result.ignored = ignored; + return result; + } + + /* + * Chemical formulae parsing, courtesy of Nathan Leung, MIT license + */ + function tokenizeInitial(formula, tokens = []) { + if (formula.length === 0) { + return tokens; + } + + // Tokenize parentheses + if (formula[0] === '(' || formula[0] === '[') { + return tokenizeInitial(formula.substr(1), [ + ...tokens, { + type: 'parenthesis', + value: 'open', + }, + ]); + } + if (formula[0] === ')' || formula[0] === ']') { + return tokenizeAfterElement(formula.substr(1), [ + ...tokens, { + type: 'parenthesis', + value: 'close', + }, + ]); + } + + // Tokenize coefficient + let num = ''; + for (let i = 0; i < formula.length; i++) { + // If the char is a number + if (!isNaN(Number(formula[i]))) { + num = num + formula[i]; + } else { + // End loop if char is not a number + break; + } + } + // If a coefficient exists + if (num !== '') { + return tokenizeInitial(formula.substr(num.length), [ + ...tokens, { + type: 'coefficient', + value: Number(num), + }, + ]); + } + + // Tokenize element + // Check if current char is uppercase letter + const char = formula[0]; + const code = char.charCodeAt(0); + if (code >= 65 && code <= 90) { + // If next char is a lowercase letter + if (formula.length > 1) { + const nextChar = formula[1]; + const nextCode = nextChar.charCodeAt(0); + if (nextCode >= 97 && nextCode <= 122) { + return tokenizeAfterElement(formula.substr(2), [ + ...tokens, { + type: 'element', + value: char + nextChar, + }, + ]); + } + } + return tokenizeAfterElement(formula.substr(1), [ + ...tokens, { + type: 'element', + value: char, + }, + ]); + } + + // If it doesn't match anything + throw Error(`There was an error parsing formula. We were able to get to here:\n${JSON.stringify(tokens, null, 2)} \n\nRemaining formula: ${formula}`); - } - - /* - * Chemical formulae parsing, courtesy of Nathan Leung, MIT license - */ - function tokenizeAfterElement(formula, tokens) { - if (formula.length === 0) { - return tokenizeInitial(formula, tokens); - } - - // Tokenize subscript - // This is repeated from tokenizeInitial, perhaps - // take it out - let num = ''; - for (let i = 0; i < formula.length; i++) { - // If the char is a number - if (!isNaN(Number(formula[i]))) { - num = num + formula[i]; - } else { - // End loop if char is not a number - break; - } - } - // If a subscript exists - if (num !== '') { - return tokenizeInitial(formula.substr(num.length), [ - ...tokens, - { - type: 'subscript', - value: Number(num), - }, - ]); - } - - // If it's not anything, then pass back to tokenizeInitial - return tokenizeInitial(formula, tokens); - } - - /* - * Chemical formulae parsing, courtesy of Nathan Leung, MIT license - */ - function countElements(tokens, elements = {}) { - if (tokens.length === 0) { - return elements; - } - // Create keys for each element present in the tokens array - if (Object.keys(elements).length === 0) { - for (let i = 0; i < tokens.length; i++) { - if (tokens[i].type === 'element') { - if (typeof elements[tokens[i].value] === 'undefined') { - elements[tokens[i].value] = 0; - } - } - } - } - - for (let i = 0; i < tokens.length; i++) { - if (tokens[i].type === 'coefficient') { - return countInGroup(tokens.slice(1), elements, tokens[i].value); - } else if (tokens[i].type === 'element') { - return countInGroup(tokens, elements); - } else if (tokens[i].type === 'parenthesis') { - if (tokens[i].value === 'open') { - for (let j = i; j < tokens.length; j++) { - if ( - tokens[j].type === 'parenthesis' && - tokens[j].value === 'close' - ) { - // There will always be a subscript after a parenthetical group - if ( - tokens.length > j + 1 && - tokens[j].type === 'parenthesis' && - tokens[j].value === 'close' - ) { - if (tokens[j + 1].type === 'subscript') { - return countInGroup( - tokens.slice(1), - elements, - tokens[j + 1].value - ); - } - } - //console.error(JSON.stringify(tokens, null, 2)); - //console.error(JSON.stringify(elements, null, 2)) - throw Error('Parenthetical group must have subscript'); - } - } - return countInGroup(tokens.slice(1), elements); - } else { - // Skip closing parenthesis and subscript - return countInGroup(tokens.slice(2), elements); - } - } - } - - // If none of the above cases are covered then throw an error - throw Error(`There was an error:\n\n${JSON.stringify(tokens, null, 2)} + } + + /* + * Chemical formulae parsing, courtesy of Nathan Leung, MIT license + */ + function tokenizeAfterElement(formula, tokens) { + if (formula.length === 0) { + return tokenizeInitial(formula, tokens); + } + + // Tokenize subscript + // This is repeated from tokenizeInitial, perhaps + // take it out + let num = ''; + for (let i = 0; i < formula.length; i++) { + // If the char is a number + if (!isNaN(Number(formula[i]))) { + num = num + formula[i]; + } else { + // End loop if char is not a number + break; + } + } + // If a subscript exists + if (num !== '') { + return tokenizeInitial(formula.substr(num.length), [ + ...tokens, { + type: 'subscript', + value: Number(num), + }, + ]); + } + + // If it's not anything, then pass back to tokenizeInitial + return tokenizeInitial(formula, tokens); + } + + /* + * Chemical formulae parsing, courtesy of Nathan Leung, MIT license + */ + function countElements(tokens, elements = {}) { + if (tokens.length === 0) { + return elements; + } + // Create keys for each element present in the tokens array + if (Object.keys(elements).length === 0) { + for (let i = 0; i < tokens.length; i++) { + if (tokens[i].type === 'element') { + if (typeof elements[tokens[i].value] === 'undefined') { + elements[tokens[i].value] = 0; + } + } + } + } + + for (let i = 0; i < tokens.length; i++) { + if (tokens[i].type === 'coefficient') { + return countInGroup(tokens.slice(1), elements, tokens[i].value); + } else if (tokens[i].type === 'element') { + return countInGroup(tokens, elements); + } else if (tokens[i].type === 'parenthesis') { + if (tokens[i].value === 'open') { + for (let j = i; j < tokens.length; j++) { + if (tokens[j].type === 'parenthesis' && tokens[j].value === 'close') { + // There will always be a subscript after a parenthetical group + if ( + tokens.length > (j + 1) && + tokens[j].type === 'parenthesis' && + tokens[j].value === 'close' + ) { + if (tokens[j + 1].type === 'subscript') { + return countInGroup(tokens.slice(1), elements, tokens[j + 1].value); + } + } + //console.error(JSON.stringify(tokens, null, 2)); + //console.error(JSON.stringify(elements, null, 2)) + throw Error('Parenthetical group must have subscript'); + } + } + return countInGroup(tokens.slice(1), elements); + } else { + // Skip closing parenthesis and subscript + return countInGroup(tokens.slice(2), elements); + } + } + } + + // If none of the above cases are covered then throw an error + throw Error(`There was an error:\n\n${JSON.stringify(tokens, null, 2)} \n\n${JSON.stringify(elements, null, 2)}`); - } - - /* - * Chemical formulae parsing, courtesy of Nathan Leung, MIT license - */ - function countInGroup(tokens, elements, coefficient = 1) { - if (tokens.length === 0) { - return countElements(tokens, elements); - } - - if (tokens[0].type !== 'element') { - // If this happens this is likely a mistake, pass back to countElements - return countElements(tokens, elements); - } - - let i = 0; - while (i < tokens.length) { - if (tokens[i].type === 'element') { - if (tokens.length > i + 1 && tokens[i + 1].type === 'subscript') { - elements[tokens[i].value] += tokens[i + 1].value * coefficient; - i += 2; - continue; - } - elements[tokens[i].value] += coefficient; - i++; - continue; - } - break; - } - return countElements(tokens.slice(i), elements); - } - - /* - * Sorting the formulae alphabetically for Optimade - */ - function sort_formula(text) { - let parsed = false; - try { - parsed = countElements(tokenizeInitial(text)); - } catch (err) { - //console.error(err); - return text; - } - let sorted = '', - els = Object.keys(parsed); - els.sort(); - els.forEach(function (el) { - sorted += el + (parsed[el] === 1 ? '' : parsed[el]); - }); - return sorted; - } - - /* - * Convert MPDS search query object notation into the Optimade filter - */ - function to_optimade(parsed) { - const filter = []; - - ['formulae', 'elements', 'props', 'classes'].forEach(function (categ) { - if (!parsed[categ]) return; - else if (categ === 'formulae') { - if (is_formula_anonymous(parsed[categ])) - filter.push(`chemical_formula_anonymous="${parsed[categ]}"`); - else - filter.push( - `chemical_formula_reduced="${sort_formula(parsed[categ])}"` - ); - } else if (categ === 'elements') { - filter.push( - `elements HAS ALL "${parsed[categ].split('-').join('","')}"` - ); - } else if (categ === 'props') { - filter.push(`_mpds_${parsed[categ].replaceAll(' ', '_')} IS KNOWN`); - } else if (categ === 'classes') { - parsed[categ].split(', ').forEach(function (item) { - const arity = arity_keys.indexOf(item); + } + + /* + * Chemical formulae parsing, courtesy of Nathan Leung, MIT license + */ + function countInGroup(tokens, elements, coefficient = 1) { + if (tokens.length === 0) { + return countElements(tokens, elements); + } + + if (tokens[0].type !== 'element') { + // If this happens this is likely a mistake, pass back to countElements + return countElements(tokens, elements); + } + + let i = 0; + while (i < tokens.length) { + if (tokens[i].type === 'element') { + if (tokens.length > (i + 1) && tokens[i + 1].type === 'subscript') { + elements[tokens[i].value] += (tokens[i + 1].value * coefficient); + i += 2; + continue; + } + elements[tokens[i].value] += coefficient; + i++; + continue; + } + break; + } + return countElements(tokens.slice(i), elements); + } + + /* + * Sorting the formulae alphabetically for Optimade + */ + function sort_formula(text) { + let parsed = false; + try { + parsed = countElements(tokenizeInitial(text)); + } catch (err) { + //console.error(err); + return text; + } + let sorted = '', + els = Object.keys(parsed); + els.sort(); + els.forEach(function (el) { + sorted += el + (parsed[el] === 1 ? '' : parsed[el]); + }); + return sorted; + } + + /* + * Convert MPDS search query object notation into the Optimade filter + */ + function to_optimade(parsed) { + const filter = []; + + ['formulae', 'elements', 'props', 'classes'].forEach(function (categ) { + if (!parsed[categ]) return; + else if (categ === 'formulae') { + if (is_formula_anonymous(parsed[categ])) + filter.push(`chemical_formula_anonymous="${parsed[categ]}"`); + else filter.push(`chemical_formula_reduced="${sort_formula(parsed[categ])}"`); + } else if (categ === 'elements') { + filter.push(`elements HAS ALL "${parsed[categ].split('-').join('","')}"`); + } else if (categ === 'props') { + filter.push(`_mpds_${parsed[categ].replaceAll(' ', '_')} IS KNOWN`); + } else if (categ === 'classes') { + parsed[categ].split(', ').forEach(function (item) { + const arity = arity_keys.indexOf(item); if (arity > 0) { filter.push(`nelements=${arity}`); } else if (mpds_classes.includes(item)) { filter.push(`_mpds_classes HAS ALL "${item}"`); } - }); - } - }); + }); + } + }); if (parsed.numeric && Array.isArray(parsed.numeric)) { parsed.numeric.forEach(function (numfilt) { @@ -2297,38 +2206,35 @@ const OptimadeNLP = function () { }); } - return filter.join(' AND '); - } - - /* - * API - */ - return { - guess, - to_optimade, - - is_formula_anonymous, - sort_formula, - parse_aeatoms, - termify_formulae, - is_numeric, - - arity_keys, - periodic_elements, - periodic_elements_cased, - periodic_element_names, - }; + return filter.join(' AND '); + } + + /* + * API + */ + return { + guess, + to_optimade, + + is_formula_anonymous, + sort_formula, + parse_aeatoms, + termify_formulae, + is_numeric, + + arity_keys, + periodic_elements, + periodic_elements_cased, + periodic_element_names + }; }; if (typeof module !== 'undefined' && module.exports) { - module.exports = OptimadeNLP; -} else if ( - typeof require === 'function' && - typeof require.specified === 'function' -) { - define(function () { - return OptimadeNLP; - }); + module.exports = OptimadeNLP; +} else if (typeof require === 'function' && typeof require.specified === 'function') { + define(function () { + return OptimadeNLP; + }); } else if (window !== undefined) { - window.OptimadeNLP = OptimadeNLP; + window.OptimadeNLP = OptimadeNLP } diff --git a/test_nlp.json b/test_nlp.json index bd61327..b5aa45a 100755 --- a/test_nlp.json +++ b/test_nlp.json @@ -1,280 +1,217 @@ [ - ["tio2", { "formulae": "tio2" }], - ["al2o3", { "formulae": "al2o3" }], - ["band gap, chlorides", { "props": "band gap", "classes": "chloride" }], - ["ti o", { "elements": "Ti-O" }], - ["tio2, band gap", { "formulae": "tio2", "props": "band gap" }], - ["organic pb i", { "classes": "organic", "elements": "Pb-I" }], - [ - "organic pb-i perovskite", - { "elements": "Pb-I", "classes": "organic, perovskite" } - ], - ["ternary oxide", { "classes": "ternary, oxide" }], - ["zeolite ite", { "classes": "zeolite" }], - ["CuLaMnSeO", { "formulae": "CuLaMnSeO" }], - ["AgGaTeSe", { "formulae": "AgGaTeSe" }], - ["MnOCaSeRu", { "formulae": "MnOCaSeRu" }], - ["LaZnOAsAm", { "formulae": "LaZnOAsAm" }], - ["MnCaOSeRu", { "formulae": "MnCaOSeRu" }], - ["MnCaSeBRu", { "formulae": "MnCaSeBRu" }], - ["MnCaSeRu", { "formulae": "MnCaSeRu" }], - ["BCNOAc", { "formulae": "BCNOAc" }], - ["LaZnOAs", { "formulae": "LaZnOAs" }], - ["LaZnAsO", { "formulae": "LaZnAsO" }], - ["LaOZnAs", { "formulae": "LaOZnAs" }], - ["OLaZnAs", { "formulae": "OLaZnAs" }], - ["OLaZnY", { "formulae": "OLaZnY" }], - ["OLaZnYB", { "formulae": "OLaZnYB" }], - ["OLaZnYBr", { "formulae": "OLaZnYBr" }], - ["OLaSZnAs", { "formulae": "OLaSZnAs" }], - ["OLaOZnAs", { "formulae": "OLaOZnAs" }], - ["OLaZnPAs", { "formulae": "OLaZnPAs" }], - ["OLaKZnPAs", { "formulae": "OLaKZnPAs" }], - ["SYPMnBr", { "formulae": "SYPMnBr" }], - ["SYMnPBr", { "formulae": "SYMnPBr" }], - ["RbMnTaO", { "formulae": "RbMnTaO" }], - ["RoMnTaO", {}], - ["CPZnOY", { "formulae": "CPZnOY" }], - ["CPOZnY", { "formulae": "CPOZnY" }], - ["BaLaMnRuO6", { "formulae": "BaLaMnRuO6" }], - ["phase diagrams", { "props": "phase diagram" }], - [ - "thermodynamics superconductor fe", - { "props": "thermodynamics", "classes": "superconductor", "elements": "Fe" } - ], - ["cub", { "lattices": "cubic" }], - [ - "tetragonal srtio3 ternary", - { "lattices": "tetragonal", "formulae": "srtio3", "classes": "ternary" } - ], - [ - "SrTiO₃ tet phonons", - { "lattices": "tetragonal", "formulae": "SrTiO3", "props": "phonons" } - ], - ["O3Al2 elastic properties", { "formulae": "O3Al2" }], - [ - "optical properties, LiKSO4", - { "props": "optical properties", "formulae": "LiKSO4" } - ], - [ - "band gap ZnO hex", - { "props": "band gap", "lattices": "hexagonal", "formulae": "ZnO" } - ], - ["geo", {}], - ["GeO", { "formulae": "GeO" }], - ["beo", {}], - ["BeO", { "formulae": "BeO" }], - ["GdNiIn conductivity", { "formulae": "GdNiIn", "props": "conductivity" }], - [ - "indium binary oxide metal", - { "elements": "In", "classes": "binary, oxide, metal" } - ], - [ - "Pd hydride lattice", - { "elements": "Pd", "classes": "hydride", "props": "lattice" } - ], - ["Mn halogens Be", { "elements": "Mn-Be", "classes": "halogen" }], - [ - "carbide semiconductor electronic properties", - { "classes": "carbide, semiconductor", "props": "electronic properties" } - ], - [ - "hexagonal carbide semiconductor electronic properties", - { - "lattices": "hexagonal", - "classes": "carbide, semiconductor", - "props": "electronic properties" - } - ], - [ - "rare earth chalcogens organics magnetism", - { "classes": "rare earth, chalcogen, organic", "props": "magnetism" } - ], - [ - "Te metals transitional Sn", - { "elements": "Te-Sn", "classes": "metal, transitional" } - ], - [ - "radioactive nonmetals conductivity", - { "classes": "radioactive, nonmetal", "props": "conductivity" } - ], - [ - "noble gases superconductivity", - { "classes": "noble gas", "props": "superconductivity" } - ], - [ - "W-Mo ternary, phase diagram", - { "elements": "W-Mo", "classes": "ternary", "props": "phase diagram" } - ], - [ - "quaternary perovskites, thermodynamics", - { "classes": "quaternary, perovskite", "props": "thermodynamics" } - ], - ["lanthanoid element", { "classes": "lanthanoid, unary" }], - [ - "lanthanoid transitional electronic properties", - { "classes": "lanthanoid, transitional", "props": "electronic properties" } - ], - [ - "lattice of intermetallic binary cubic", - { - "props": "lattice", - "classes": "intermetallic, binary", - "lattices": "cubic" - } - ], - ["oxygen", { "elements": "O" }], - [ - "Y oxide lattice ternary", - { "elements": "Y", "classes": "oxide, ternary", "props": "lattice" } - ], - [ - "phases for ternary ferromagnet", - { "props": "phases", "classes": "ternary, ferromagnet" } - ], - [ - "superconductivity superconductors C", - { - "props": "superconductivity", - "classes": "superconductor", - "elements": "C" - } - ], - [ - "magnetism, ferromagnet, chalcogen, monocl", - { - "props": "magnetism", - "classes": "ferromagnet, chalcogen", - "lattices": "monoclinic" - } - ], - ["Rg", { "elements": "Rg" }], - ["ErSi", { "formulae": "ErSi" }], - ["ErSI", { "formulae": "ErSI" }], - ["Ca3(PO4)2", { "formulae": "Ca3(PO4)2" }], - ["Ca3[PO4]2", { "formulae": "Ca3[PO4]2" }], - ["conductors", { "classes": "conductor" }], - ["chromium, fe, tricl", { "elements": "Cr-Fe", "lattices": "triclinic" }], - ["astatine, cell", { "elements": "At" }], - ["Gruneisen coefficient", { "props": "gruneisen coefficients" }], - ["Seebeck coefficient", { "props": "seebeck coefficient" }], - ["figure of merit", { "props": "figure of merit" }], - ["conductivity", { "props": "conductivity" }], - [ - "crystalline structure of binaries", - { "props": "crystalline structure", "classes": "binary" } - ], - [ - "crystal cell for binary compound", - { "props": "crystal cell", "classes": "binary" } - ], - ["valence", { "props": "valence" }], - ["conductivity", { "props": "conductivity" }], - [ - "La magnetic phase diagram", - { "elements": "La", "props": "magnetic phase diagram" } - ], - ["info on phase diagrams plots", { "props": "phase diagram plots" }], - ["optical conductivity", { "props": "optical conductivity" }], - ["cell and atoms", { "classes": "cell and atoms" }], - ["conductor superconductor", { "classes": "conductor, superconductor" }], - ["metal nonmetal", { "classes": "metal, nonmetal" }], - ["metal or nonmetal character", { "props": "metal or nonmetal character" }], - ["electrical resistance values", { "props": "electrical properties" }], - ["temperature derivative by pressure", { "props": "pressure" }], - ["temperature derivative by composition", {}], - ["optical phonon frequency", { "props": "optical properties" }], - ["enthalpy conductivity", { "props": "values of enthalpy" }], - ["decomposis of the magneto", {}], - ["TiO2 SrTiO3", { "formulae": "TiO2" }], - ["average number of 4f electrons", {}], - ["225aaa!", {}], - ["he-he-he", { "elements": "He-He-He" }], - [ - "electronic contribution to thermal conductivity", - { "props": "electronic contribution to thermal conductivity" } - ], - [ - "electronic contribution to superconducting heat capacity", - { "props": "electronic contribution to superconducting heat capacity" } - ], - ["residual resistivity", { "props": "residual resistivity ratio" }], - [ - "optical absorption no", - { "props": "optical absorption coefficient", "elements": "No" } - ], - ["energy of optical phonon", { "props": "energy of optical phonon" }], - ["luminescence", { "props": "values of luminescence" }], - ["values of luminescence", { "props": "values of luminescence" }], - ["magnetization", { "props": "magnetization" }], - ["values of magnetization", { "props": "values of magnetization" }], - [ - "energy of longitudinal optical phonon", - { "props": "energy of longitudinal optical phonon" } - ], - [ - "permittivity (dielectric constant) of perovskites ", - { "props": "permittivity", "classes": "perovskite" } - ], - ["density > 1", { "numeric": [["density", ">", 1]], "phased": true }], - ["InFO", { "formulae": "InFO" }], - ["info", {}], - ["Goodbye.", {}], - ["iiii", {}], - ["...", {}], - ["What'd I do?", { "elements": "I" }], - ["OK, let's go", { "formulae": "OK" }], - ["Ginny Danburry's here. Look for her", {}], - ["I think u make me laughing.", { "elements": "I-U" }], - ["physical properties", { "props": "physical properties" }], - ["physical property", { "props": "physical properties" }], - ["electric property", { "props": "electric properties" }], - ["ab initio", {}], - ["ab initio calculations,", { "classes": "ab initio calculations" }], - ["ab initio literature,", { "classes": "ab initio literature" }], - [ - "ab initio literature, ab initio calculations,", - { "classes": "ab initio literature, ab initio calculations" } - ], - [ - "ab initio calculations P K", - { "elements": "P-K", "classes": "ab initio calculations" } - ], - [ - "ab initio literature P K", - { "elements": "P-K", "classes": "ab initio literature" } - ], - [ - "ab initio, ab initio calculations, P K", - { "elements": "P-K", "classes": "ab initio calculations" } - ], - [ - "ab initio literature, ab initio calculations, P K", - { - "elements": "P-K", - "classes": "ab initio literature, ab initio calculations" - } - ], - ["A1B2C3", { "formulae": "A1B2C3" }], - ["AB", { "formulae": "AB" }], - ["ab datum", {}], - [ - "AB ab initio calculations", - { "formulae": "AB", "classes": "ab initio calculations" } - ], - ["ABC7D9", { "formulae": "ABC7D9" }], - ["ABCD", { "formulae": "ABCD" }], - ["ABCDE", {}], - ["filter=author=\"Sąžininga Žąsis\"", {}], - [ - "(elements HAS \"Ac\" AND nelements=1) OR (elements HAS \"Ac\" AND nsites=1)", - {} - ], - ["nelements=1", {}], - ["elements HAS \"Ti\"", {}], - ["elements HAS ALL \"C\",\"N\",\"O\",\"H\"", {}], - ["elements HAS \"Ti\" AND nelements>3", {}], - ["chemical_formula_reduced=\"Li7Sn2\"", {}], + ["tio2", { "formulae": "tio2" }], + ["al2o3", { "formulae": "al2o3" }], + ["band gap, chlorides", { "props": "band gap", "classes": "chloride" }], + ["ti o", { "elements": "Ti-O" }], + ["tio2, band gap", { "formulae": "tio2", "props": "band gap" }], + ["organic pb i", { "classes": "organic", "elements": "Pb-I" }], + ["organic pb-i perovskite", { "elements": "Pb-I", "classes": "organic, perovskite" }], + ["ternary oxide", { "classes": "ternary, oxide" }], + ["zeolite ite", { "classes": "zeolite" }], + ["CuLaMnSeO", { "formulae": "CuLaMnSeO" }], + ["AgGaTeSe", { "formulae": "AgGaTeSe" }], + ["MnOCaSeRu", { "formulae": "MnOCaSeRu" }], + ["LaZnOAsAm", { "formulae": "LaZnOAsAm" }], + ["MnCaOSeRu", { "formulae": "MnCaOSeRu" }], + ["MnCaSeBRu", { "formulae": "MnCaSeBRu" }], + ["MnCaSeRu", { "formulae": "MnCaSeRu" }], + ["BCNOAc", { "formulae": "BCNOAc" }], + ["LaZnOAs", { "formulae": "LaZnOAs" }], + ["LaZnAsO", { "formulae": "LaZnAsO" }], + ["LaOZnAs", { "formulae": "LaOZnAs" }], + ["OLaZnAs", { "formulae": "OLaZnAs" }], + ["OLaZnY", { "formulae": "OLaZnY" }], + ["OLaZnYB", { "formulae": "OLaZnYB" }], + ["OLaZnYBr", { "formulae": "OLaZnYBr" }], + ["OLaSZnAs", { "formulae": "OLaSZnAs" }], + ["OLaOZnAs", { "formulae": "OLaOZnAs" }], + ["OLaZnPAs", { "formulae": "OLaZnPAs" }], + ["OLaKZnPAs", { "formulae": "OLaKZnPAs" }], + ["SYPMnBr", { "formulae": "SYPMnBr" }], + ["SYMnPBr", { "formulae": "SYMnPBr" }], + ["RbMnTaO", { "formulae": "RbMnTaO" }], + ["RoMnTaO", {}], + ["CPZnOY", { "formulae": "CPZnOY" }], + ["CPOZnY", { "formulae": "CPOZnY" }], + ["BaLaMnRuO6", { "formulae": "BaLaMnRuO6" }], + ["phase diagrams", { "props": "phase diagram" }], + [ + "thermodynamics superconductor fe", + { "props": "thermodynamics", "classes": "superconductor", "elements": "Fe" } + ], + ["cub", { "lattices": "cubic" }], + [ + "tetragonal srtio3 ternary", + { "lattices": "tetragonal", "formulae": "srtio3", "classes": "ternary" } + ], + [ + "SrTiO₃ tet phonons", + { "lattices": "tetragonal", "formulae": "SrTiO3", "props": "phonons" } + ], + ["O3Al2 elastic properties", { "formulae": "O3Al2" }], + ["optical properties, LiKSO4", { "props": "optical properties", "formulae": "LiKSO4" }], + ["band gap ZnO hex", { "props": "band gap", "lattices": "hexagonal", "formulae": "ZnO" }], + ["geo", {}], + ["GeO", { "formulae": "GeO" }], + ["beo", {}], + ["BeO", { "formulae": "BeO" }], + ["GdNiIn conductivity", { "formulae": "GdNiIn", "props": "conductivity" }], + ["indium binary oxide metal", { "elements": "In", "classes": "binary, oxide, metal" }], + ["Pd hydride lattice", { "elements": "Pd", "classes": "hydride", "props": "lattice" }], + ["Mn halogens Be", { "elements": "Mn-Be", "classes": "halogen" }], + [ + "carbide semiconductor electronic properties", + { "classes": "carbide, semiconductor", "props": "electronic properties" } + ], + [ + "hexagonal carbide semiconductor electronic properties", + { + "lattices": "hexagonal", + "classes": "carbide, semiconductor", + "props": "electronic properties" + } + ], + [ + "rare earth chalcogens organics magnetism", + { "classes": "rare earth, chalcogen, organic", "props": "magnetism" } + ], + ["Te metals transitional Sn", { "elements": "Te-Sn", "classes": "metal, transitional" }], + [ + "radioactive nonmetals conductivity", + { "classes": "radioactive, nonmetal", "props": "conductivity" } + ], + ["noble gases superconductivity", { "classes": "noble gas", "props": "superconductivity" }], + [ + "W-Mo ternary, phase diagram", + { "elements": "W-Mo", "classes": "ternary", "props": "phase diagram" } + ], + [ + "quaternary perovskites, thermodynamics", + { "classes": "quaternary, perovskite", "props": "thermodynamics" } + ], + ["lanthanoid element", { "classes": "lanthanoid, unary" }], + [ + "lanthanoid transitional electronic properties", + { "classes": "lanthanoid, transitional", "props": "electronic properties" } + ], + [ + "lattice of intermetallic binary cubic", + { "props": "lattice", "classes": "intermetallic, binary", "lattices": "cubic" } + ], + ["oxygen", { "elements": "O" }], + [ + "Y oxide lattice ternary", + { "elements": "Y", "classes": "oxide, ternary", "props": "lattice" } + ], + ["phases for ternary ferromagnet", { "props": "phases", "classes": "ternary, ferromagnet" }], + [ + "superconductivity superconductors C", + { "props": "superconductivity", "classes": "superconductor", "elements": "C" } + ], + [ + "magnetism, ferromagnet, chalcogen, monocl", + { "props": "magnetism", "classes": "ferromagnet, chalcogen", "lattices": "monoclinic" } + ], + ["Rg", { "elements": "Rg" }], + ["ErSi", { "formulae": "ErSi" }], + ["ErSI", { "formulae": "ErSI" }], + ["Ca3(PO4)2", { "formulae": "Ca3(PO4)2" }], + ["Ca3[PO4]2", { "formulae": "Ca3[PO4]2" }], + ["conductors", { "classes": "conductor" }], + ["chromium, fe, tricl", { "elements": "Cr-Fe", "lattices": "triclinic" }], + ["astatine, cell", { "elements": "At" }], + ["Gruneisen coefficient", { "props": "gruneisen coefficients" }], + ["Seebeck coefficient", { "props": "seebeck coefficient" }], + ["figure of merit", { "props": "figure of merit" }], + ["conductivity", { "props": "conductivity" }], + [ + "crystalline structure of binaries", + { "props": "crystalline structure", "classes": "binary" } + ], + ["crystal cell for binary compound", { "props": "crystal cell", "classes": "binary" }], + ["valence", { "props": "valence" }], + ["conductivity", { "props": "conductivity" }], + ["La magnetic phase diagram", { "elements": "La", "props": "magnetic phase diagram" }], + ["info on phase diagrams plots", { "props": "phase diagram plots" }], + ["optical conductivity", { "props": "optical conductivity" }], + ["cell and atoms", { "classes": "cell and atoms" }], + ["conductor superconductor", { "classes": "conductor, superconductor" }], + ["metal nonmetal", { "classes": "metal, nonmetal" }], + ["metal or nonmetal character", { "props": "metal or nonmetal character" }], + ["electrical resistance values", { "props": "electrical properties" }], + ["temperature derivative by pressure", { "props": "pressure" }], + ["temperature derivative by composition", {}], + ["optical phonon frequency", { "props": "optical properties" }], + ["enthalpy conductivity", { "props": "values of enthalpy" }], + ["decomposis of the magneto", {}], + ["TiO2 SrTiO3", { "formulae": "TiO2" }], + ["average number of 4f electrons", {}], + ["225aaa!", {}], + ["he-he-he", { "elements": "He-He-He" }], + [ + "electronic contribution to thermal conductivity", + { "props": "electronic contribution to thermal conductivity" } + ], + [ + "electronic contribution to superconducting heat capacity", + { "props": "electronic contribution to superconducting heat capacity" } + ], + ["residual resistivity", { "props": "residual resistivity ratio" }], + ["optical absorption no", { "props": "optical absorption coefficient", "elements": "No" }], + ["energy of optical phonon", { "props": "energy of optical phonon" }], + ["luminescence", { "props": "values of luminescence" }], + ["values of luminescence", { "props": "values of luminescence" }], + ["magnetization", { "props": "magnetization" }], + ["values of magnetization", { "props": "values of magnetization" }], + ["energy of longitudinal optical phonon", { "props": "energy of longitudinal optical phonon" }], + [ + "permittivity (dielectric constant) of perovskites ", + { "props": "permittivity", "classes": "perovskite" } + ], + ["density > 1", { "numeric": [["density", ">", 1]], "phased": true }], + ["InFO", { "formulae": "InFO" }], + ["info", {}], + ["Goodbye.", {}], + ["iiii", {}], + ["...", {}], + ["What'd I do?", { "elements": "I" }], + ["OK, let's go", { "formulae": "OK" }], + ["Ginny Danburry's here. Look for her", {}], + ["I think u make me laughing.", { "elements": "I-U" }], + ["physical properties", { "props": "physical properties" }], + ["physical property", { "props": "physical properties" }], + ["electric property", { "props": "electric properties" }], + ["ab initio", {}], + ["ab initio calculations,", { "classes": "ab initio calculations" }], + ["ab initio literature,", { "classes": "ab initio literature" }], + [ + "ab initio literature, ab initio calculations,", + { "classes": "ab initio literature, ab initio calculations" } + ], + ["ab initio calculations P K", { "elements": "P-K", "classes": "ab initio calculations" }], + ["ab initio literature P K", { "elements": "P-K", "classes": "ab initio literature" }], + [ + "ab initio, ab initio calculations, P K", + { "elements": "P-K", "classes": "ab initio calculations" } + ], + [ + "ab initio literature, ab initio calculations, P K", + { "elements": "P-K", "classes": "ab initio literature, ab initio calculations" } + ], + ["A1B2C3", { "formulae": "A1B2C3" }], + ["AB", { "formulae": "AB" }], + ["ab datum", {}], + ["AB ab initio calculations", { "formulae": "AB", "classes": "ab initio calculations" }], + ["ABC7D9", { "formulae": "ABC7D9" }], + ["ABCD", { "formulae": "ABCD" }], + ["ABCDE", {}], + ["filter=author=\"Sąžininga Žąsis\"", {}], + ["(elements HAS \"Ac\" AND nelements=1) OR (elements HAS \"Ac\" AND nsites=1)", {}], + ["nelements=1", {}], + ["elements HAS \"Ti\"", {}], + ["elements HAS ALL \"C\",\"N\",\"O\",\"H\"", {}], + ["elements HAS \"Ti\" AND nelements>3", {}], + ["chemical_formula_reduced=\"Li7Sn2\"", {}], ["chemical_formula_anonymous=\"ABC\"", {}], ["elements HAS \"C\" AND elements HAS \"N\" AND elements HAS \"O\"", {}], ["elements HAS ANY \"C\",\"N\",\"O\"", {}],