Commit d7fa8427 authored by Daniel Speckhard's avatar Daniel Speckhard
Browse files

Added CASTEP, repo normalizer checks multiple sections.

parent 52dfe361
Pipeline #44025 failed with stages
in 22 minutes and 18 seconds
...@@ -58,3 +58,6 @@ ...@@ -58,3 +58,6 @@
[submodule "dependencies/parsers/orca"] [submodule "dependencies/parsers/orca"]
path = dependencies/parsers/orca path = dependencies/parsers/orca
url = https://gitlab.mpcdf.mpg.de/nomad-lab/parser-orca url = https://gitlab.mpcdf.mpg.de/nomad-lab/parser-orca
[submodule "dependencies/parsers/castep"]
path = dependencies/parsers/castep
url = https://gitlab.mpcdf.mpg.de/nomad-lab/parser-castep
...@@ -157,7 +157,16 @@ ...@@ -157,7 +157,16 @@
"-sv", "tests/test_normalizing.py::test_normalizer[parsers/abinit-tests/data/parsers/abinit/Fe.out]" "-sv", "tests/test_normalizing.py::test_normalizer[parsers/abinit-tests/data/parsers/abinit/Fe.out]"
] ]
}, },
{
"name": "Castep Normalizer",
"type": "python",
"request": "launch",
"cwd": "${workspaceFolder}",
"program": "${workspaceFolder}/.pyenv/bin/pytest",
"args": [
"-sv", "tests/test_normalizing.py::test_normalizer[parsers/castep-tests/data/parsers/castep/BC2N-Pmm2-Raman.castep]"
]
},
{ {
"name": "Python: Current File", "name": "Python: Current File",
"type": "python", "type": "python",
......
Subproject commit b42810dee0ef7eb97cc5de41f4b5b110f21ab374
...@@ -52,11 +52,40 @@ class RepositoryNormalizer(Normalizer): ...@@ -52,11 +52,40 @@ class RepositoryNormalizer(Normalizer):
else: else:
return match.group(0) return match.group(0)
def get_optional_value(self, key): def get_optional_value(self, key, section):
try: # Section is section_system, section_symmetry, etc...
return self._backend.get_value(key, 0) val = None # Initialize to None, so we can compare section values.
except KeyError: diff_flag = False # Flag to store whether vals differ between sections.
# Loop over the sections with the name section in the backend.
for section_index in self._backend.get_sections(section):
try:
new_val = self._backend.get_value(key, section_index)
except KeyError:
continue
# Compare values from iterations.
diff_bool = new_val != val
if type(diff_bool) is bool:
if diff_bool and val is not None:
diff_flag = True
elif diff_bool.all() and (val is not None):
# Then we have an array, and diff bool has multiple values since
# each item in array has been compared item for item.
diff_flag = True
val = new_val
if diff_flag is True:
self.logger.warning(
'The values for %s differ between different %s' % (key, section))
if val is None:
self.logger.warning(
'The values for %s where not available in any %s' % (key, section))
return unavailable_label return unavailable_label
else:
return val
def normalize(self, logger=None) -> None: def normalize(self, logger=None) -> None:
super().normalize(logger) super().normalize(logger)
...@@ -75,19 +104,33 @@ class RepositoryNormalizer(Normalizer): ...@@ -75,19 +104,33 @@ class RepositoryNormalizer(Normalizer):
'repository_code_version', 'repository_code_version',
self.simplify_version(b.get_value('program_version', 0))) self.simplify_version(b.get_value('program_version', 0)))
b.addValue('repository_parser_id', b.get_value('parser_name', 0)) b.addValue('repository_parser_id', b.get_value('parser_name', 0))
b.addValue('repository_chemical_formula', b.get_value('chemical_composition_bulk_reduced', 0)) # We add these values as optional as some parser developers may create parsers
atom_labels = b.get_value('atom_labels', 0) # where the first section contains only properties that are constant over
# several simulations and use the other sections to define simulation specific
# parameters. Ex. in first section_system CASTEP parsers defines # of elections
# and in subsequent sections it defines atom labels, positions, etc...
atom_labels = self.get_optional_value('atom_labels', 'section_system')
b.addValue('repository_atomic_elements', list(set(atom_labels))) b.addValue('repository_atomic_elements', list(set(atom_labels)))
b.addValue('repository_atomic_elements_count', len(atom_labels)) b.addValue('repository_atomic_elements_count', len(atom_labels))
b.addValue('repository_system_type', b.get_value('system_type', 0)) b.addValue(
'repository_crystal_system',
b.addValue('repository_crystal_system', self.get_optional_value('crystal_system')) self.get_optional_value('crystal_system', 'section_symmetry'))
b.addValue('repository_spacegroup_nr', self.get_optional_value('space_group_number')) b.addValue(
'repository_spacegroup_nr',
b.addValue('repository_basis_set_type', self.get_optional_value('program_basis_set_type')) self.get_optional_value('space_group_number', 'section_symmetry'))
b.addValue(
'repository_basis_set_type',
self.get_optional_value('program_basis_set_type', 'section_run'))
b.addValue(
'repository_system_type',
self.get_optional_value('system_type', 'section_system'))
b.addValue(
'repository_chemical_formula',
self.get_optional_value('chemical_composition_bulk_reduced', 'section_system'))
b.addValue( b.addValue(
'repository_xc_treatment', 'repository_xc_treatment',
self.map_functional_name_to_xc_treatment(self.get_optional_value(('XC_functional_name')))) self.map_functional_name_to_xc_treatment(
self.get_optional_value('XC_functional_name', 'section_method')))
b.closeNonOverlappingSection('section_repository_parserdata') b.closeNonOverlappingSection('section_repository_parserdata')
if repository_info_context is None: if repository_info_context is None:
......
...@@ -48,10 +48,10 @@ class SystemNormalizer(SystemBasedNormalizer): ...@@ -48,10 +48,10 @@ class SystemNormalizer(SystemBasedNormalizer):
self.atom_labels = section_system['atom_labels'] self.atom_labels = section_system['atom_labels']
self.atom_species = section_system['atom_atom_numbers'] self.atom_species = section_system['atom_atom_numbers']
self.atom_positions = section_system['atom_positions'] self.atom_positions = section_system['atom_positions']
# Assume if no config for periodic dimensions that we simply don't have any.
# TODO: @dts, this may be something we should change since many DFT programs
# have implicit periodicity. Talk with Georg/Claudia.
self.periodic_dirs = section_system.get('configuration_periodic_dimensions', None) self.periodic_dirs = section_system.get('configuration_periodic_dimensions', None)
if self.periodic_dirs is None:
self.logger.warning(
'Unable to get PBCs in this section_system, assume False, False, Fasle')
# Try to first read the cell information from the renamed metainfo # Try to first read the cell information from the renamed metainfo
# lattice_vectors, if this doesn't work try the depreciated name # lattice_vectors, if this doesn't work try the depreciated name
# simulation_cell. Otherwise, if neither are present, assign None. # simulation_cell. Otherwise, if neither are present, assign None.
...@@ -119,7 +119,6 @@ class SystemNormalizer(SystemBasedNormalizer): ...@@ -119,7 +119,6 @@ class SystemNormalizer(SystemBasedNormalizer):
self.atom_species = [ self.atom_species = [
atom_label_to_num(atom_label) for atom_label in self.atom_labels atom_label_to_num(atom_label) for atom_label in self.atom_labels
] ]
formula = None formula = None
if self.atom_species: if self.atom_species:
...@@ -133,7 +132,6 @@ class SystemNormalizer(SystemBasedNormalizer): ...@@ -133,7 +132,6 @@ class SystemNormalizer(SystemBasedNormalizer):
formula_bulk = formula_reduced formula_bulk = formula_reduced
else: else:
formula_bulk = formula formula_bulk = formula
if self.cell is not None: if self.cell is not None:
results['lattice_vectors'] = self.cell results['lattice_vectors'] = self.cell
...@@ -149,7 +147,6 @@ class SystemNormalizer(SystemBasedNormalizer): ...@@ -149,7 +147,6 @@ class SystemNormalizer(SystemBasedNormalizer):
# TODO: @dts, might be good to clean this up so it is more readable in the # TODO: @dts, might be good to clean this up so it is more readable in the
# future. # future.
configuration_id = 's' + addShasOfJson(results).b64digests()[0][0:28] configuration_id = 's' + addShasOfJson(results).b64digests()[0][0:28]
self._backend.addValue('configuration_raw_gid', configuration_id) self._backend.addValue('configuration_raw_gid', configuration_id)
self._backend.addValue('atom_species', self.atom_species) self._backend.addValue('atom_species', self.atom_species)
self._backend.addValue('chemical_composition', formula) self._backend.addValue('chemical_composition', formula)
...@@ -309,4 +306,4 @@ class SystemNormalizer(SystemBasedNormalizer): ...@@ -309,4 +306,4 @@ class SystemNormalizer(SystemBasedNormalizer):
if nomad_classification == 'Atom' and (len(self.atom_labels) > 1): if nomad_classification == 'Atom' and (len(self.atom_labels) > 1):
nomad_classification = 'Molecule / Cluster' nomad_classification = 'Molecule / Cluster'
return nomad_classification return nomad_classification
\ No newline at end of file
...@@ -269,6 +269,11 @@ parsers = [ ...@@ -269,6 +269,11 @@ parsers = [
r'\s+\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\**\s*' r'\s+\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\*\**\s*'
r'\s*' r'\s*'
r'\s*--- An Ab Initio, DFT and Semiempirical electronic structure package ---\s*') r'\s*--- An Ab Initio, DFT and Semiempirical electronic structure package ---\s*')
),
LegacyParser(
name='parsers/castep',
parser_class_name='castepparser.CastepParser',
mainfile_contents_re=(r'\s\|\s*CCC\s*AA\s*SSS\s*TTTTT\s*EEEEE\s*PPPP\s*\|\s*')
) )
] ]
......
This diff is collapsed.
...@@ -43,14 +43,15 @@ parser_examples = [ ...@@ -43,14 +43,15 @@ parser_examples = [
('parsers/gaussian', 'tests/data/parsers/gaussian/aniline.out'), ('parsers/gaussian', 'tests/data/parsers/gaussian/aniline.out'),
('parsers/abinit', 'tests/data/parsers/abinit/Fe.out'), ('parsers/abinit', 'tests/data/parsers/abinit/Fe.out'),
('parsers/quantumespresso', 'tests/data/parsers/quantum-espresso/benchmark.out'), ('parsers/quantumespresso', 'tests/data/parsers/quantum-espresso/benchmark.out'),
('parsers/orca', 'tests/data/parsers/orca/orca3dot2706823.out') ('parsers/orca', 'tests/data/parsers/orca/orca3dot2706823.out'),
('parsers/castep', 'tests/data/parsers/castep/BC2N-Pmm2-Raman.castep')
] ]
faulty_unknown_one_d_matid_example = [ faulty_unknown_one_d_matid_example = [
('parsers/template', 'tests/data/normalizers/no_sim_cell_boolean_positions.json') ('parsers/template', 'tests/data/normalizers/no_sim_cell_boolean_positions.json')
] ]
correct_num_output_files = 21 correct_num_output_files = 22
class TestLocalBackend(object): class TestLocalBackend(object):
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment