Commit de57d11c authored by Markus Scheidgen's avatar Markus Scheidgen
Browse files

Fixed issues with anonymous and hill formula. #450

parent 1f725a7f
......@@ -22,7 +22,9 @@ from optimade.models import StructureResource
from nomad import config, datamodel, files, search, utils
from nomad.normalizing.optimade import optimade_chemical_formula_reduced
from nomad.normalizing.optimade import (
optimade_chemical_formula_reduced, optimade_chemical_formula_anonymous,
optimade_chemical_formula_hill)
from .filterparser import _get_transformer as get_transformer
......@@ -113,14 +115,12 @@ class ElasticsearchStructureCollection(EntryCollection):
if nresults_now > 1:
raise HTTPException(
status_code=404,
detail=f"Instead of a single entry, {nresults_now} entries were found",
)
detail=f'Instead of a single entry, {nresults_now} entries were found')
results = results[0] if results else None
return results, data_returned, more_data_available, all_fields - fields
def _check_aliases(self, aliases):
""" Check that aliases do not clash with mongo keywords. """
pass
def _es_to_optimade_result(
......@@ -161,9 +161,14 @@ class ElasticsearchStructureCollection(EntryCollection):
if include('last_modified'):
attrs['last_modified'] = entry.last_processing if entry.last_processing is not None else entry.upload_time
# TODO this should be removed, once all data is reprocessed with the right normalization
attrs['chemical_formula_reduced'] = optimade_chemical_formula_reduced(
attrs['chemical_formula_reduced'])
attrs['chemical_formula_anonymous'] = optimade_chemical_formula_anonymous(
attrs['chemical_formula_reduced'])
attrs['chemical_formula_hill'] = optimade_chemical_formula_hill(
attrs['chemical_formula_hill'])
attrs['chemical_formula_descriptive'] = attrs['chemical_formula_hill']
dimension_types = attrs['dimension_types']
if isinstance(dimension_types, int):
attrs['dimension_types'] = [1] * dimension_types + [0] * (3 - dimension_types)
......
......@@ -23,6 +23,7 @@ import ase.data
import ase.formula
from string import ascii_uppercase
import pint.quantity
from collections import OrderedDict
from nomad.normalizing.normalizer import SystemBasedNormalizer
from nomad.units import ureg
......@@ -50,6 +51,43 @@ def optimade_chemical_formula_reduced(formula: str):
return formula
def optimade_chemical_formula_anonymous(formula: str):
if formula is None:
return formula
try:
ase_formula = ase.formula.Formula(formula).count()
result_formula = ''
for index, element_count in enumerate(reversed(sorted(ase_formula.values()))):
result_formula += ascii_uppercase[index]
if element_count > 1:
result_formula += str(element_count)
return result_formula
except Exception:
return formula
def optimade_chemical_formula_hill(formula: str):
if formula is None:
return formula
try:
ase_formula = ase.formula.Formula(formula).count()
result: Dict[str, int] = OrderedDict()
if 'C' in ase_formula:
for symbol in 'CH':
if symbol in ase_formula:
result[symbol] = ase_formula.pop(symbol)
for symbol, n in sorted(ase_formula.items()):
result[symbol] = n
return ''.join([
symbol + (str(n) if n > 1 else '')
for symbol, n in result.items()])
except Exception:
return formula
class OptimadeNormalizer(SystemBasedNormalizer):
'''
......@@ -113,14 +151,11 @@ class OptimadeNormalizer(SystemBasedNormalizer):
# formulas
optimade.chemical_formula_reduced = optimade_chemical_formula_reduced(
get_value(section_system.chemical_composition_reduced))
optimade.chemical_formula_hill = get_value(section_system.chemical_composition_bulk_reduced)
optimade.chemical_formula_hill = optimade_chemical_formula_hill(
get_value(section_system.chemical_composition))
optimade.chemical_formula_descriptive = optimade.chemical_formula_hill
optimade.chemical_formula_anonymous = ''
for i in range(len(optimade.elements)):
part = '%s' % ascii_uppercase[i % len(ascii_uppercase)]
if atom_counts[optimade.elements[i]] > 1:
part += str(atom_counts[optimade.elements[i]])
optimade.chemical_formula_anonymous += part
optimade.chemical_formula_anonymous = optimade_chemical_formula_anonymous(
optimade.chemical_formula_reduced)
# sites
optimade.nsites = len(nomad_species)
......
......@@ -112,7 +112,7 @@ def example_structures(elastic_infra, mongo_infra, raw_files_infra):
('NOT chemical_formula_descriptive ENDS WITH "1"', 4),
('chemical_formula_descriptive CONTAINS "C" AND NOT chemical_formula_descriptive STARTS WITH "O"', 1),
('NOT chemical_formula_anonymous STARTS WITH "A"', 0),
('chemical_formula_anonymous CONTAINS "AB2" AND chemical_formula_anonymous ENDS WITH "C"', 1),
('chemical_formula_anonymous CONTAINS "A2B" AND chemical_formula_anonymous ENDS WITH "C"', 1),
('nsites >=3 AND elements LENGTH = 2', 2),
('elements LENGTH = 2', 3),
('elements LENGTH 2', 3),
......
#
# Copyright The NOMAD Authors.
#
# This file is part of NOMAD. See https://nomad-lab.eu for further info.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
import pytest
from nomad.normalizing import optimade
@pytest.mark.parametrize('formula, expected', [
('NaClHC', 'CHClNa'), ('NaClH2', 'ClH2Na')
])
def test_chemical_formula_hill(formula, expected):
assert optimade.optimade_chemical_formula_hill(formula) == expected
@pytest.mark.parametrize('formula, expected', [
('Na3Cl2H', 'A3B2C'), ('NaNaNaClClHH', 'A3B2C2')
])
def test_chemical_formula_anonymous(formula, expected):
assert optimade.optimade_chemical_formula_anonymous(formula) == expected
@pytest.mark.parametrize('formula, expected', [
('Na3Cl2H', 'Cl2HNa3'), ('NaNaNaClClHH', 'Cl2H2Na3')
])
def test_chemical_formula_reduced(formula, expected):
assert optimade.optimade_chemical_formula_reduced(formula) == expected
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment