Commit 09970928 authored by Markus Scheidgen's avatar Markus Scheidgen
Browse files

Added aims parser and normalizer. Refactored test data.

parent 89e2c6bd
......@@ -58,13 +58,13 @@
]
},
{
"name": "Python: tests/test_parsing.py::test_exciting_parser",
"name": "Python: tests/test_parsing.py fhi",
"type": "python",
"request": "launch",
"cwd": "${workspaceFolder}",
"program": "${workspaceFolder}/.pyenv/bin/pytest",
"args": [
"-sv", "tests/test_parsing.py::test_exciting_parser"
"-sv", "tests/test_parsing.py::test_parser[parsers/fhi-aims-.dependencies/parsers/fhi-aims/test/examples/Au2_non-periodic_geometry_optimization.out]"
]
},
{
......
......@@ -164,6 +164,10 @@ dependencies = [
name='parsers/exciting',
git_url='https://gitlab.mpcdf.mpg.de/nomad-lab/parser-exciting.git',
git_commit='nomad-xt'),
PythonGit(
name='parsers/fhi-aims',
git_url='https://gitlab.mpcdf.mpg.de/nomad-lab/parser-fhi-aims.git',
git_commit='master'),
PythonGit(
name='normalizers/stats',
git_url='https://gitlab.mpcdf.mpg.de/nomad-lab/normalizer-stats.git',
......
......@@ -17,6 +17,7 @@ from .normalizer import Normalizer
from .system import SystemNormalizer
from .symmetry import SymmetryNormalizer
from .systemtype import SystemTypeNormalizer
from .fhiaims import FhiAimsBaseNormalizer
"""
After parsing calculations have to be normalized with a set of *normalizers*.
......@@ -28,6 +29,7 @@ In NOMAD-coe those were programmed in python (we'll reuse) and scala (we'll rewr
# instead of classes.
normalizers: List[Any] = [
SystemNormalizer,
FhiAimsBaseNormalizer,
SymmetryNormalizer,
SystemTypeNormalizer
]
# Copyright 2018 Fawzi Mohamed, Danio Brambila, Markus Scheidgen
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an"AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import glob
import os.path
import json
import numpy as np
from nomad.normalizing.normalizer import Normalizer
controlIn_basis_set = 'x_fhi_aims_section_controlIn_basis_set'
controlIn_basis_func = 'x_fhi_aims_section_controlIn_basis_func'
controlIn_nucleus = 'x_fhi_aims_controlIn_nucleus'
pure_types_json = dict()
files = glob.glob(os.path.join(__file__, "data/*.json"))
for file in files:
pure_types_str = os.path.basename(os.path.split(file)[-1])
with open(file) as data_file:
json_data = json.load(data_file)
section_method = json_data['sections']['section_run-0']['sections']['section_method-0']
pure_types_json[pure_types_str] = section_method[controlIn_basis_set]
class FhiAimsBaseNormalizer(Normalizer):
# Finds out if val is in the array
def compare_val_list(self, val, list):
if val in list:
return 0
return 1
# Comparison between two dicts.
# List structure:
def compare_dict_dict(self, d1, d2):
sum2 = np.zeros(len(d2))
# Loop over the size of dict2
for k in np.arange(0, len(d2)):
# Lopp over the elements of each dict2
for idx, val in d1.items():
# Excludes the keys that are always different.
if (idx not in ["gIndex", "references", "uri"]):
try:
if (val != d2[k][idx]):
sum2[k] = sum2[k] + 1
except KeyError: # this exception case arises if the cut off potential is not a number
continue
if (min(sum2) == 0):
return 0
else:
return 1 # sum(sum2)
def compare_to_defaults(self, dict2_default, dict1):
# first compare the integration grid
false_hits_integration_grid = 0
false_hits_basis = 0
for key in dict1:
if key not in ['gIndex', 'uri', controlIn_basis_func]:
if np.size(dict1[key]) == 1:
if(dict1[key] != dict2_default[key]):
false_hits_integration_grid += 1
false_hits_integration_grid += abs(np.size(dict1[key]) - np.size(dict2_default[key]))
if np.size(dict1[key]) > 1:
for i in dict1[key]:
false_hits_integration_grid += self.compare_val_list(i, dict2_default[key])
false_hits_integration_grid += abs(np.size(dict1[key]) - np.size(dict2_default[key]))
elif (key == controlIn_basis_func):
for i in np.arange(0, len(dict1[key])):
false_hits_basis += self.compare_dict_dict(
dict1[key][i], dict2_default[key])
false_hits_basis += abs(len(dict1[key]) - len(dict2_default[key]))
return [false_hits_integration_grid, false_hits_basis]
def normalize(self) -> None:
for index in self._backend.get_sections('section_method'):
to_compare = self._backend.get_value(controlIn_basis_set, index)
if to_compare is None:
# not fhi aims data
continue
matrix_hits_int = dict.fromkeys(pure_types_json, 0)
matrix_hits_basis = dict.fromkeys(pure_types_json, 0)
for index, data in enumerate(to_compare):
atom_index = int(data[controlIn_nucleus])
for key, val in pure_types_json.items():
results = self.compare_to_defaults(val[atom_index], to_compare[index])
matrix_hits_int[key] += results[0]
matrix_hits_basis[key] += results[1]
# matrix_hits[key]=matrix_hits[key]+CompareToDefaults(val[AtomIndex],to_compare[i])
context_uri = '/section_run/0/section_method/%d' % index
self._backend.openContext(context_uri)
closest_base_int = min(matrix_hits_int, key=matrix_hits_int.get)
if (matrix_hits_basis[min(matrix_hits_basis, key=matrix_hits_basis.get)] == 0):
closest_base_base = ''
else:
closest_base_base = '+'
if (matrix_hits_int[closest_base_int] == 0):
# print(closest_base_int +closest_base_base)
self._backend.addValue('basis_set', closest_base_int + closest_base_base)
elif(matrix_hits_int[closest_base_int] <= 5):
# print('~'+closest_base_int+closest_base_base)
self._backend.addValue('basis_set', '~' + closest_base_int + closest_base_base)
elif(matrix_hits_int[closest_base_int] > 5):
self._backend.addValue('basis_set', 'custom-' + closest_base_int)
# print('custom-'+closest_base_int)
self._backend.closeContext(context_uri)
self._backend.finishedParsingSession("ParseSuccess", None)
# import setup_paths
# import json
# from numpy import *
# import os.path, glob
# from nomadcore.local_meta_info import loadJsonFile, InfoKindEl
# from nomadcore.parser_backend import JsonParseEventsWriterBackend
# from nomadcore.parse_streamed_dicts import *
# import sys
# import logging
# base_path = os.path.abspath(os.path.dirname(__file__))
# files = glob.glob(os.path.join(base_path,"json_default/*.json"))
# # Open json files
# def open_json(path):
# default=dict()
# with open(path) as data_file:
# default = json.load(data_file)
# return default
# # Finds out if val is in the array
# def CompareList(val,list):
# if val in list:
# return 0
# return 1
# # Comparison between two dicts.
# # List structure:
# def CompareTwoDicts(d1,d2):
# sum2=zeros(len(d2))
# # Loop over the size of dict2
# for k in arange(0,len(d2)):
# # Lopp over the elements of each dict2
# for idx,val in d1.items():
# if (idx not in ["gIndex","references", "uri"]) : # Excludes the keys that are always different.
# try:
# if (val!=d2[k][idx]):
# sum2[k]=sum2[k]+1
# except KeyError: # this exception case arises if the cut off potential is not a number
# continue
# if (min(sum2)==0):
# return 0
# else:
# return 1# sum(sum2)
# def CompareToDefaults(dict2_default,dict1):
# #first compare the integration grid
# false_hits_integration_grid=0
# false_hits_basis=0
# for key in dict1:
# if key not in ["gIndex", "uri", "x_fhi_aims_section_controlIn_basis_func"]:
# if size(dict1[key])==1:
# if(dict1[key]!=dict2_default[key]):
# false_hits_integration_grid+=1
# false_hits_integration_grid+=abs(size(dict1[key])-size(dict2_default[key]))
# if size(dict1[key])>1:
# for i in dict1[key]:
# false_hits_integration_grid+=CompareList(i,dict2_default[key])
# false_hits_integration_grid+=abs(size(dict1[key])-size(dict2_default[key]))
# elif (key == "x_fhi_aims_section_controlIn_basis_func"):
# for i in arange(0,len(dict1[key])):
# false_hits_basis+=CompareTwoDicts(dict1[key][i],dict2_default[key])
# false_hits_basis+=abs(len(dict1[key])-len(dict2_default[key]))
# return [false_hits_integration_grid,false_hits_basis]
# import sys, getopt
# def main():
# metapath = '../../../../nomad-meta-info/meta_info/nomad_meta_info/' +\
# 'common.nomadmetainfo.json'
# metaInfoPath = os.path.normpath(
# os.path.join(os.path.dirname(os.path.abspath(__file__)), metapath))
# metaInfoEnv, warns = loadJsonFile(filePath=metaInfoPath,
# dependencyLoader=None,
# extraArgsHandling=InfoKindEl.ADD_EXTRA_ARGS,
# uri=None)
# backend = JsonParseEventsWriterBackend(metaInfoEnv)
# calcContext = sys.argv[1]
# backend.startedParsingSession(
# calcContext,
# parserInfo = {'name':'FhiAimsBasisNormalizer', 'version': '1.0'})
# pure_types_json={}
# matrix_hits_int={}
# matrix_hits_basis={}
# A=ParseStreamedDicts(sys.stdin)
# while True:
# to_compare=A.readNextDict()
# if to_compare is None:
# break
# context=to_compare["context"] #uncomment for prod
# try:
# to_compare=to_compare["section_method"]["x_fhi_aims_section_controlIn_basis_set"]
# except:
# continue
# matrix_hits_int = dict.fromkeys(matrix_hits_int, 0)
# matrix_hits_basis = dict.fromkeys(matrix_hits_basis, 0)
# for i,d in enumerate(to_compare):
# AtomIndex=int(d["x_fhi_aims_controlIn_nucleus"])
# for key,val in pure_types_json.items():
# res=CompareToDefaults(val[AtomIndex],to_compare[i])
# matrix_hits_int[key]+=res[0]
# matrix_hits_basis[key]+=res[1]
# # matrix_hits[key]=matrix_hits[key]+CompareToDefaults(val[AtomIndex],to_compare[i])
# Copen=backend.openContext(context)
# closest_base_int=min(matrix_hits_int, key=matrix_hits_int.get)
# if (matrix_hits_basis[min(matrix_hits_basis, key=matrix_hits_basis.get)] ==0):
# closest_base_base=''
# else:
# closest_base_base='+'
# if (matrix_hits_int[closest_base_int]==0):
# # print(closest_base_int +closest_base_base)
# backend.addValue("basis_set",closest_base_int +closest_base_base)
# elif(matrix_hits_int[closest_base_int]<=5):
# # print('~'+closest_base_int+closest_base_base)
# backend.addValue("basis_set","~"+closest_base_int +closest_base_base)
# elif(matrix_hits_int[closest_base_int]>5):
# backend.addValue("basis_set",'custom-'+closest_base_int)
# # print('custom-'+closest_base_int)
# backend.closeContext(context)
# backend.finishedParsingSession("ParseSuccess", None)
# sys.stdout.flush()
# return
# if __name__ == "__main__":
# main()
# Copyright 2018 Markus Scheidgen
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an"AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from abc import ABCMeta, abstractmethod
from typing import List, Dict, Any
......@@ -6,13 +20,23 @@ from nomad.utils import get_logger
logger = get_logger(__name__)
s_system = 'section_system'
s_scc = 'section_single_configuration_calculation'
s_frame_sequence = 'section_frame_sequence'
r_scc_to_system = 'single_configuration_calculation_to_system_ref'
r_frame_sequence_local_frames = 'frame_sequence_local_frames_ref'
class Normalizer(metaclass=ABCMeta):
"""
A base class for normalizers. Normalizers work on a :class:`AbstractParserBackend` instance
for read and write.
Arguments:
backend: the backend used to read and write data from and to
"""
def __init__(self, backend: AbstractParserBackend) -> None:
self._backend = backend
@abstractmethod
......@@ -21,6 +45,20 @@ class Normalizer(metaclass=ABCMeta):
class SystemBasedNormalizer(Normalizer, metaclass=ABCMeta):
"""
A normalizer base class for normalizers that only touch a section_system.
The normalizer is either run on all section systems or only for systems that are
linked to a section_single_configuration_calculation. Also if there are multiple sccs,
the normalizer is only run for the last frame belonging to a frame sequence.
Arguments:
all_sections: apply normalizer to all section_system instances or only the
last single config calc of the last frame sequence
"""
def __init__(self, backend: AbstractParserBackend, all_sections=True) -> None:
super().__init__(backend=backend)
self._all_sections = all_sections
@property
def quantities(self) -> List[str]:
......@@ -41,7 +79,7 @@ class SystemBasedNormalizer(Normalizer, metaclass=ABCMeta):
try:
input_data[quantity] = self._backend.get_value(quantity, g_index)
except KeyError:
# onyl fail when the normalizer actually uses the respecitive value
# only fail when the normalizer actually uses the respecitive value
pass
context = input_data['uri']
......@@ -56,7 +94,30 @@ class SystemBasedNormalizer(Normalizer, metaclass=ABCMeta):
pass
def normalize(self) -> None:
for g_index in self._backend.get_sections('section_system'):
if self._all_sections:
systems = self._backend.get_sections(s_system)
else:
# look for sccs in last frames
sccs = []
for frame_seq in self._backend.get_sections(s_frame_sequence):
frames = self._backend.get_value(r_frame_sequence_local_frames, frame_seq)
if len(frames) > 0:
sccs.append(frames[-1])
# no sccs from frames -> consider all sccs
if len(sccs) == 0:
sccs = self._backend.get_sections(s_scc)
# no sccs -> consider all systems
systems = [self._backend.get_value(r_scc_to_system, scc) for scc in sccs]
# only take the first, and last two systems
if len(systems) == 0:
systems = self._backend.get_sections(s_system)
if len(systems) > 2:
systems = [systems[0], systems[-2], systems[-1]]
for g_index in systems:
try:
self._normalize_system(g_index)
except KeyError as e:
......
......@@ -20,6 +20,8 @@ class SymmetryNormalizer(SystemBasedNormalizer):
"""
This is basically a copy of the legace NOMAD-coe symmetry normalizer.
"""
def __init__(self, backend):
super().__init__(backend, all_sections=True)
def normalize_system(self, section_system) -> None:
normalize(self._backend, section_system)
......@@ -64,7 +64,7 @@ class SystemNormalizer(SystemBasedNormalizer):
else:
formula_bulk = formula
cell = section_system['simulation_cell']
cell = section_system.get('simulation_cell', None)
if cell is not None:
results['lattice_vectors'] = cell
......@@ -77,6 +77,7 @@ class SystemNormalizer(SystemBasedNormalizer):
if periodic_dirs is not None:
results['configuration_periodic_dimensions'] = periodic_dirs.tolist()
symm = None
configuration_id = 's' + addShasOfJson(results).b64digests()[0][0:28]
if cell is not None and atom_labels is not None:
if cell is not None:
......@@ -116,7 +117,7 @@ class SystemNormalizer(SystemBasedNormalizer):
self._backend.addValue("chemical_composition_reduced", formula_reduced)
self._backend.addValue("chemical_composition_bulk_reduced", formula_bulk)
if symm:
if symm is not None:
# for quantity in ["number", "international", "hall", "choice", "pointgroup"]:
# v = symm.get(quantity)
# if v is not None:
......
......@@ -17,6 +17,8 @@ from systemtypenormalizer.classify_structure import ClassifyStructure
class SystemTypeNormalizer(SystemBasedNormalizer):
def __init__(self, backend):
super().__init__(backend, all_sections=True)
def normalize_system(self, section_system) -> None:
structure = ClassifyStructure(section_system)
......
......@@ -48,9 +48,13 @@ Parsers in NOMAD-coe use a *backend* to create output.
from typing import TextIO, Tuple, List, Any, Callable, IO
from abc import ABCMeta, abstractmethod
from io import StringIO
import sys
import json
import re
import importlib
import inspect
from unittest.mock import patch
import io
from nomadcore.local_backend import LocalBackend as LegacyLocalBackend
from nomadcore.local_backend import Section, Results
......@@ -364,7 +368,9 @@ class LocalBackend(LegacyParserBackend):
self._open_context: Tuple[str, int] = None
self._context_section = None
# things that have no real purpos, but are required by some legacy code
self._unknown_attributes = {}
self.fileOut = io.StringIO()
def __getattr__(self, name):
""" Support for unimplemented and unexpected methods. """
......@@ -621,8 +627,17 @@ class Parser():
parser_class = self.parser_class_name.split('.')[1]
module = importlib.import_module('.'.join(module_name))
Parser = getattr(module, parser_class)
parser = Parser(backend=create_backend, debug=True)
backend = parser.parse(mainfile)
init_signature = inspect.getargspec(Parser.__init__)
kwargs = dict(
backend=create_backend,
mainfile=mainfile, main_file=mainfile,
debug=True)
kwargs = {key: value for key, value in kwargs.items() if key in init_signature.args}
parser = Parser(**kwargs)
with patch.object(sys, 'argv', []):
backend = parser.parse(mainfile)
# TODO we need a homogeneous interface to parsers, but we dont have it right now
# thats a hack to distringuish between ParserInterface parser and simple_parser
......@@ -656,6 +671,14 @@ parsers = [
r'\s*\|\s*EXCITING\s+\S+\s+started\s*='
r'\s*\|\s*version hash id:\s*\S*\s*=')
),
Parser(
python_git=dependencies['parsers/fhi-aims'],
parser_class_name='fhiaimsparser.FHIaimsParser',
main_file_re=r'^.*\.out$', # TODO
main_contents_re=(
r'\s*Invoking FHI-aims \.\.\.\n'
r'\s*Version')
)
]
""" Instanciation and constructor based config of all parsers. """
......
......@@ -6,5 +6,5 @@ POST http://enc-staging-nomad.esc.rzg.mpg.de/nomadxt/api/uploads HTTP/1.1
content-type: application/json
{
"name": "RxvmhsBvB2xO7gbKi9OMa625OQoY6.zip"
"name": "RlVlpdRpKNozUtEx9dpD3E4Qc-_X4.zip"
}
......@@ -14,23 +14,33 @@
import pytest
from nomad.parsing import LocalBackend, parser_dict
from nomad.parsing import LocalBackend
from nomad.normalizing import normalizers
from tests.test_parsing import parsed_vasp_example # pylint: disable=unused-import
from tests.test_parsing import parsed_example # pylint: disable=unused-import
@pytest.fixture
def normalized_vasp_example(parsed_vasp_example: LocalBackend) -> LocalBackend:
status, _ = parsed_vasp_example.status
def run_normalize(backend: LocalBackend) -> LocalBackend:
status, _ = backend.status
assert status == 'ParseSuccess'
for normalizer_class in normalizers:
normalizer = normalizer_class(parsed_vasp_example)
normalizer = normalizer_class(backend)
normalizer.normalize()
return parsed_vasp_example
return backend
@pytest.fixture
def normalized_vasp_example(parsed_vasp_example: LocalBackend) -> LocalBackend:
return run_normalize(parsed_vasp_example)
@pytest.fixture
def normalized_example(parsed_example: LocalBackend) -> LocalBackend:
return run_normalize(parsed_example)
def assert_normalized(backend):
......@@ -43,23 +53,5 @@ def assert_normalized(backend):
assert backend.get_value('chemical_composition_bulk_reduced', 0) is not None
def test_normalizer(normalized_vasp_example: LocalBackend):
assert_normalized(normalized_vasp_example)
def test_normalizer_reproduce():
parser = 'parsers/exciting'
mainfile = '.dependencies/parsers/exciting/test/examples/Ag/INFO.OUT'
parser = parser_dict[parser]
backend = parser.run(mainfile)
status, errors = backend.status
assert status == 'ParseSuccess'
assert errors is None or len(errors) == 0
for normalizer_class in normalizers:
normalizer = normalizer_class(backend)
normalizer.normalize()
assert_normalized(backend)
def test_normalizer(normalized_example: LocalBackend):
assert_normalized(normalized_example)
......@@ -22,6 +22,13 @@ from nomadcore.local_meta_info import loadJsonFile
from nomad.parsing import JSONStreamWriter, parser_dict
from nomad.parsing import LocalBackend, BadContextURI
parser_examples = [
('parsers/exciting', '.dependencies/parsers/exciting/test/examples/Ag/INFO.OUT'),
('parsers/exciting', '.dependencies/parsers/exciting/test/examples/GW/INFO.OUT'),
('parsers/vasp', '.dependencies/parsers/vasp/test/examples/xml/perovskite.xml'),
('parsers/fhi-aims', 'tests/data/parsers/aims.out')
]
class TestLocalBackend(object):
......@@ -177,36 +184,32 @@ def test_stream_generator(pretty):
assert create_reference(example_data, pretty) == out.getvalue()
@pytest.fixture