Commit 778ee19e authored by Ask Hjorth Larsen's avatar Ask Hjorth Larsen
code for properly parsing arrays

parent 58d41be1
...@@ -3,10 +3,13 @@ import os ...@@ -3,10 +3,13 @@ import os
import sys import sys
import setup_paths import setup_paths
import numpy as np
from import chemical_symbols
from nomadcore.simple_parser import mainFunction, SimpleMatcher as SM from nomadcore.simple_parser import mainFunction, SimpleMatcher as SM
from nomadcore.local_meta_info import loadJsonFile, InfoKindEl from nomadcore.local_meta_info import loadJsonFile, InfoKindEl
from nomadcore.unit_conversion.unit_conversion \ from nomadcore.unit_conversion.unit_conversion \
import register_userdefined_quantity import register_userdefined_quantity, convert_unit
from util import floating from util import floating
...@@ -23,6 +26,77 @@ def siesta_energy(title, meta): ...@@ -23,6 +26,77 @@ def siesta_energy(title, meta):
return SM(r'siesta:\s*%s\s*=\s*(?P<%s__eV>\S*)' % (title, meta), return SM(r'siesta:\s*%s\s*=\s*(?P<%s__eV>\S*)' % (title, meta),
name=meta) name=meta)
def array_matcher():
def getarray():
sm = SM()
return sm
#class ArrayParser(SM):
# def __init__(self, *args, **kwargs):
# SM.__init__(self, *args, adHoc=get_ar**kwargs)
#def array_line_matcher(*args, **kwargs):
# def
# sm = SimpleMatcher(*args, repeats=True, **kwargs)
# return sm
def get_positions_and_labels(parser):
rows = []
atomic_numbers = []
line = parser.fIn.readline()
while line.startswith('siesta:'):
tokens = line.split()
rows.append([float(x) for x in tokens[1:4]])
line = parser.fIn.readline()
rows = np.array(rows, float)
labels = np.array([chemical_symbols[Z] for Z in atomic_numbers])
b = parser.backend.superBackend
b.addArrayValues('atom_positions', convert_unit(rows, 'bohr'))
b.addArrayValues('atom_labels', labels)
#def get_cell(parser):
def ArraySM(header, row, end, build):
lines = []
def addrow(parser):
line = parser.fIn.readline()
def _build_array(parser):
build(parser.backend.superBackend, lines)
sm = SM(header,
SM(row, name='array', repeats=True,
adHoc=addrow, required=True),
SM(end, name='endarray', required=True),
SM(r'', adHoc=_build_array, name='dummy', forwardMatch=True)
return sm
def build_cell(backend, lines):
cell = np.array([[float(x) for x in line.split()] for line in lines])
backend.addArrayValues('simulation_cell', convert_unit(cell, 'angstrom'))
def add_positions_and_labels(backend, lines):
matrix = np.array([line.split() for line in lines], object)
positions = matrix[:, 1:4].astype(float)
labels = np.array([chemical_symbols[i] for i in matrix[:, 5].astype(int)])
backend.addArrayValues('atom_positions', convert_unit(positions, 'bohr'))
backend.addArrayValues('atom_labels', labels)
infoFileDescription = SM( infoFileDescription = SM(
name='root', name='root',
weak=True, weak=True,
...@@ -33,11 +107,15 @@ infoFileDescription = SM( ...@@ -33,11 +107,15 @@ infoFileDescription = SM(
subMatchers=[ subMatchers=[
SM(r'Siesta Version: (?P<program_name>siesta)-(?P<program_version>\S*)', SM(r'Siesta Version: (?P<program_name>siesta)-(?P<program_version>\S*)',
name='name&version'), name='name&version'),
SM(r'outcell: Unit cell vectors \(Ang\):', name='cell_header'), ArraySM(r'siesta: Atomic coordinates \(Bohr\) and species',
r'siesta:', r'', add_positions_and_labels),
SM(r'\s*Single-point calculation', SM(r'\s*Single-point calculation',
name='singleconfig', name='singleconfig',
sections=['section_single_configuration_calculation'], sections=['section_single_configuration_calculation'],
subMatchers=[ subMatchers=[
ArraySM(r'outcell: Unit cell vectors \(Ang\):',
r'\s*', build_cell),
SM(r'siesta: Final energy \(eV\):', SM(r'siesta: Final energy \(eV\):',
name='energy_header', name='energy_header',
subMatchers=[ subMatchers=[
