Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
190 changes: 188 additions & 2 deletions index.js
Original file line number Diff line number Diff line change
Expand Up @@ -901,6 +901,140 @@ const OptimadeNLP = function () {
'zircosulfate',
];

// Mapping of common element groups / periods to element symbol arrays
const ELEMENT_GROUPS_MAP = {
'period 1': ['H', 'He'],
'period 2': ['Li', 'Be', 'B', 'C', 'N', 'O', 'F', 'Ne'],
'period 3': ['Na', 'Mg', 'Al', 'Si', 'P', 'S', 'Cl', 'Ar'],
'period 4': [
'K',
'Ca',
'Sc',
'Ti',
'V',
'Cr',
'Mn',
'Fe',
'Co',
'Ni',
'Cu',
'Zn',
'Ga',
'Ge',
'As',
'Se',
'Br',
'Kr',
],
'period 5': [
'Rb',
'Sr',
'Y',
'Zr',
'Nb',
'Mo',
'Tc',
'Ru',
'Rh',
'Pd',
'Ag',
'Cd',
'In',
'Sn',
'Sb',
'Te',
'I',
'Xe',
],
'period 6': [
'Cs',
'Ba',
'Lu',
'Hf',
'Ta',
'W',
'Re',
'Os',
'Ir',
'Pt',
'Au',
'Hg',
'Tl',
'Pb',
'Bi',
'Po',
'At',
'Rn',
],
'period 7': ['Fr', 'Ra', 'Lr'],

lanthanide: [
'La',
'Ce',
'Pr',
'Nd',
'Pm',
'Sm',
'Eu',
'Gd',
'Tb',
'Dy',
'Ho',
'Er',
'Tm',
'Yb',
],
actinide: [
'Ac',
'Th',
'Pa',
'U',
'Np',
'Pu',
'Am',
'Cm',
'Bk',
'Cf',
'Es',
'Fm',
'Md',
'No',
],

alkali: ['H', 'Li', 'Na', 'K', 'Rb', 'Cs', 'Fr'],
alkaline: ['Be', 'Mg', 'Ca', 'Sr', 'Ba', 'Ra'],

'group 3': ['Sc', 'Y', 'Lu', 'Lr'],
'group 4': ['Ti', 'Zr', 'Hf'],
'group 5': ['V', 'Nb', 'Ta'],
'group 6': ['Cr', 'Mo', 'W'],
'group 7': ['Mn', 'Tc', 'Re'],
'group 8': ['Fe', 'Ru', 'Os'],
'group 9': ['Co', 'Rh', 'Ir'],
'group 10': ['Ni', 'Pd', 'Pt'],
'group 11': ['Cu', 'Ag', 'Au', 'Rg'],
'group 12': ['Zn', 'Cd', 'Hg'],

triels: ['B', 'Al', 'Ga', 'In', 'Tl'],
triel: ['B', 'Al', 'Ga', 'In', 'Tl'],
tetrels: ['C', 'Si', 'Ge', 'Sn', 'Pb', 'Fl'],
tetrel: ['C', 'Si', 'Ge', 'Sn', 'Pb', 'Fl'],
pnictogen: ['N', 'P', 'As', 'Sb', 'Bi'],
chalcogen: ['O', 'S', 'Se', 'Te', 'Po'],
halogen: ['F', 'Cl', 'Br', 'I', 'At'],
'noble gas': ['He', 'Ne', 'Ar', 'Kr', 'Xe', 'Rn'],
};

function getGroupElements(name) {
if (!name) return null;
const key = name.toLowerCase().trim();
// normalize simple plural forms
if (ELEMENT_GROUPS_MAP[key]) return ELEMENT_GROUPS_MAP[key];
const singular = key.replace(/s$/i, '');
if (ELEMENT_GROUPS_MAP[singular]) return ELEMENT_GROUPS_MAP[singular];
return null;
}

const mpds_props = [
'acceptor concentration',
'acceptor to donor concentration',
Expand Down Expand Up @@ -1423,6 +1557,10 @@ const OptimadeNLP = function () {
return ['classes', 'lanthanoid'];
else if (term.endsWith('ite') && term.length > 4) return ['classes'];

// direct element-group single-word matches (tetrels, triels, chalcogen, etc.)
const groupEls = getGroupElements(term);
if (groupEls) return ['elements', groupEls.join('-')];

const chk = term
.replace(' structure', '')
.replace(' lattice', '')
Expand Down Expand Up @@ -1454,6 +1592,16 @@ const OptimadeNLP = function () {
}
//console.log("CHECKING TERM FOR MULTI-FACET: "+term);

// Special handling: phrases like 'period 2' or 'group 11'
const pgMatch = term.match(/^(period|group)\s+(\d{1,2})$/);
if (pgMatch) {
const kind = pgMatch[1];
const num = pgMatch[2];
const key = `${kind} ${num}`;
const els = getGroupElements(key);
if (els) return { facet: 'elements', input: els.join('-'), ready: 1 };
}

candidate = check_category(term, 'classes');
if (candidate) {
if (combined) candidate.combined = true;
Expand Down Expand Up @@ -1654,6 +1802,15 @@ const OptimadeNLP = function () {
let facet = false,
simple = false;
input = input.trim();
const linput = input.toLowerCase();

// Allow 'period' and 'group' to be queued so they can combine with a following number
if (linput === 'period' || linput === 'group') {
// push a not-ready candidate to queue and continue
queue.push({ input: linput, ready: 0 });
n_toks++;
return;
}

if (
input.includes('<') ||
Expand All @@ -1662,7 +1819,16 @@ const OptimadeNLP = function () {
is_numeric(input)
) {
// numeric searches
if (
// If this numeric token is actually the second part of 'period N' or 'group N',
// don't treat it as a numeric filter but allow multiword facet combining.
if (
is_numeric(input) &&
queue.length &&
(queue[queue.length - 1].input === 'period' ||
queue[queue.length - 1].input === 'group')
) {
// fall-through to normal multiword processing
} else if (
input.indexOf('<') === 0 ||
input.indexOf('>') === 0 ||
input.indexOf('=') === 0
Expand Down Expand Up @@ -2015,11 +2181,31 @@ const OptimadeNLP = function () {
} else if (categ === 'classes') {
parsed[categ].split(', ').forEach(function (item) {
const arity = arity_keys.indexOf(item);
if (arity > 0) filter.push(`nelements=${arity}`);
if (arity > 0) {
filter.push(`nelements=${arity}`);
} else if (mpds_classes.includes(item)) {
filter.push(`_mpds_classes HAS ALL "${item}"`);
}
});
}
});

if (parsed.numeric && Array.isArray(parsed.numeric)) {
parsed.numeric.forEach(function (numfilt) {
// numfilt: [property, operator, value]
if (
numfilt.length === 3 &&
numfilt[0] &&
numfilt[1] &&
typeof numfilt[2] !== 'undefined'
) {
// Remove _mpds_ prefix if present
let prop = numfilt[0].replace(/^_mpds_/, '').replace(/ /g, '_');
filter.push(`${prop}${numfilt[1]}${numfilt[2]}`);
}
});
}

return filter.join(' AND ');
}

Expand Down
53 changes: 52 additions & 1 deletion test_nlp.json
Original file line number Diff line number Diff line change
Expand Up @@ -212,5 +212,56 @@
["elements HAS ALL \"C\",\"N\",\"O\",\"H\"", {}],
["elements HAS \"Ti\" AND nelements>3", {}],
["chemical_formula_reduced=\"Li7Sn2\"", {}],
["chemical_formula_anonymous=\"ABC\"", {}]
["chemical_formula_anonymous=\"ABC\"", {}],
["elements HAS \"C\" AND elements HAS \"N\" AND elements HAS \"O\"", {}],
["elements HAS ANY \"C\",\"N\",\"O\"", {}],
["elements HAS \"C\" OR elements HAS \"N\" OR elements HAS \"O\"", {}],
["elements HAS \"C\" AND (elements HAS \"N\" OR elements HAS \"O\")", {}],
["elements HAS ALL \"C\",\"N\",\"O\" AND nelements=3", {}],
["elements HAS ALL \"C\",\"N\",\"O\" AND nelements=4", {}],
["elements HAS ANY \"C\",\"N\",\"O\" AND nelements=3", {}],
["elements HAS ANY \"C\",\"N\",\"O\" AND nelements=4", {}],
["nelements>3 AND elements HAS ANY \"C\",\"N\",\"O\"", {}],
["nelements>4 AND elements HAS ANY \"C\",\"N\",\"O\"", {}],
["nelements>3 AND elements HAS ALL \"C\",\"N\",\"O\"", {}],
["nelements>4 AND elements HAS ALL \"C\",\"N\",\"O\"", {}],
["band gap>1.5", { "numeric": [["band gap", ">", 1.5]], "phased": true }],
["band gap<1.5", { "numeric": [["band gap", "<", 1.5]], "phased": true }],
["band gap>=1.5", { "numeric": [["band gap", ">=", 1.5]], "phased": true }],
["band gap<=1.5", { "numeric": [["band gap", "<=", 1.5]], "phased": true }],
["band gap=1.5", { "numeric": [["band gap", "=", 1.5]], "phased": true }],
["band gap!=1.5", { "numeric": [["band gap", "!=", 1.5]], "phased": true }],
[
"band gap>1.5 AND band gap<2.0",
{
"numeric": [
["band gap", ">", 1.5],
["band gap", "<", 2.0]
],
"phased": true
}
],
["chalcogens", { "elements": "O-S-Se-Te-Po" }],
["period 2", { "elements": "Li-Be-B-C-N-O-F-Ne" }],
["group 11", { "elements": "Cu-Ag-Au-Rg" }],
["tetrels", { "elements": "C-Si-Ge-Sn-Pb-Fl" }],
["tetrel", { "elements": "C-Si-Ge-Sn-Pb-Fl" }],
["all tetrels", { "elements": "C-Si-Ge-Sn-Pb-Fl" }],

["all chalcogens", { "elements": "O-S-Se-Te-Po" }],
["Chalcogens", { "elements": "O-S-Se-Te-Po" }],
["chalcogen", { "elements": "O-S-Se-Te-Po" }],
["CHALCOGENS", { "elements": "O-S-Se-Te-Po" }],

["period 2 elements", { "elements": "Li-Be-B-C-N-O-F-Ne" }],
["Period 2", { "elements": "Li-Be-B-C-N-O-F-Ne" }],
["PERIOD 2", { "elements": "Li-Be-B-C-N-O-F-Ne" }],

["group 11 elements", { "elements": "Cu-Ag-Au-Rg" }],
["Group 11", { "elements": "Cu-Ag-Au-Rg" }],
["ALL GROUP 11", { "elements": "Cu-Ag-Au-Rg" }],

["tetrel", { "elements": "C-Si-Ge-Sn-Pb-Fl" }],
["Tetrels", { "elements": "C-Si-Ge-Sn-Pb-Fl" }],
["ALL tetrels", { "elements": "C-Si-Ge-Sn-Pb-Fl" }]
]