diff --git a/nomad/parsing/file_parser/file_parser.py b/nomad/parsing/file_parser/file_parser.py index 7ecaf7511d46807185df539a495c67a783f2dbc2..a9dd56b443c5e55b1be6e99b55fd45c0d3bcaf40 100644 --- a/nomad/parsing/file_parser/file_parser.py +++ b/nomad/parsing/file_parser/file_parser.py @@ -20,6 +20,7 @@ import gzip import bz2 import lzma import tarfile +from contextlib import contextmanager from nomad.metainfo import MSection, SubSection from nomad.utils import get_logger @@ -96,6 +97,21 @@ class FileParser(ABC): return self._mainfile_obj + @contextmanager + def open_mainfile_obj(self): + """ + Returns the mainfile object with a context. + """ + try: + self._mainfile_obj = self.open(self._mainfile) + yield self._mainfile_obj + except Exception: + pass + finally: + if self._mainfile_obj is not None: + self._mainfile_obj.close() + self._mainfile_obj = None + @property def mainfile(self): """ @@ -138,7 +154,11 @@ class FileParser(ABC): open_file = tarfile.open else: open_file = open - return open_file(mainfile) + + try: + return open_file(mainfile) + except Exception: + pass def get( self, @@ -223,6 +243,21 @@ class FileParser(ABC): string += f'--> {len(results)} parsed quantities ({", ".join(results[:5])}{", ..." if len(results) > 5 else ""})' return string + def __enter__(self): + return self + + def __exit__(self, exc_type, exc_val, exc_tb): + self.close() + + def close(self): + if self._mainfile_obj: + self._mainfile_obj.close() + if self._file_handler is not None: + try: + self._file_handler.close() + except Exception: + pass + class Parser(ABC): mainfile: str = None diff --git a/nomad/parsing/file_parser/text_parser.py b/nomad/parsing/file_parser/text_parser.py index af9d387c9dce8621ee6f00fb324a3c0e22cfc741..aa151d726523e7ec66c9bfc812ed845b68ec2faf 100644 --- a/nomad/parsing/file_parser/text_parser.py +++ b/nomad/parsing/file_parser/text_parser.py @@ -559,15 +559,17 @@ class TextParser(FileParser): if quantity.sub_parser is not None: self._parse_quantity(quantity) - # free up memory - self._file_handler = b' ' - else: for quantity in self._quantities: if quantity.name == key or key is None: if quantity.name not in self._results: self._parse_quantity(quantity) + # free up memory + if isinstance(self._file_handler, mmap.mmap) and self.findall: + self._file_handler.close() + self._file_handler = b' ' + return self def clear(self): @@ -603,7 +605,8 @@ class DataTextParser(TextParser): data = np.loadtxt(self.mainfile) else: if not self._mainfile_contents and self.mainfile_obj: - self._mainfile_contents = self.mainfile_obj.read() + with self.mainfile_obj as mainfile_obj: + self._mainfile_contents = mainfile_obj.read() if self._mainfile_contents: buffer = self._mainfile_contents if isinstance(buffer, str): diff --git a/nomad/parsing/file_parser/vasp_parser.py b/nomad/parsing/file_parser/vasp_parser.py new file mode 100644 index 0000000000000000000000000000000000000000..adae8590087bdade00b55a0a8620b27a048afb4f --- /dev/null +++ b/nomad/parsing/file_parser/vasp_parser.py @@ -0,0 +1,85 @@ +import numpy as np + +import runschema +from nomad.parsing.file_parser.mapping_parser import MappingAnnotationModel, XMLParser + + +class Program(runschema.run.Program): + version = runschema.run.Program.version + version.m_annotations = dict( + xml=MappingAnnotationModel( + # path=[".i[?_name=='version']"], + operator=( + 'get_version', + [ + ".i[?_name=='version']", + ".i[?_name=='subversion']", + ".i[?_name=='platform']", + ], + ), + ) + ) + + # compilation_datetime = runschema.run.Program.compilation_datetime + # compilation_datetime.m_annotations = dict( + # xml=XMLAnnotation(operator=[('get_compilation_datetime', ["i[?_name=='']"])]) + # ) + + +class BandEnergies(runschema.calculation.BandEnergies): + n_spin_channels = runschema.calculation.BandEnergies.n_spin_channels + n_spin_channels.m_annotations = dict( + xml=MappingAnnotationModel(path='length(.array.set.set)') + ) + + n_kpoints = runschema.calculation.BandEnergies.n_kpoints + n_kpoints.m_annotations = dict( + xml=MappingAnnotationModel(path='length(.array.set.set[0].set)') + ) + + energies = runschema.calculation.BandEnergies.energies + energies.m_annotations = dict( + xml=MappingAnnotationModel( + operator=( + 'get_eigenvalues_energies', + [ + '.array.set.set[].set[].r', + 'length(.array.set.set)', + 'length(.array.set.set[0].set)', + ], + ), + ) + ) + + +class Calculation(runschema.calculation.Calculation): + eigenvalues = runschema.calculation.Calculation.eigenvalues + eigenvalues.m_annotations = dict(xml=MappingAnnotationModel(path='.eigenvalues')) + + +class Run(runschema.run.Run): + program = runschema.run.Run.program + program.m_annotations = dict(xml=MappingAnnotationModel(path='.generator')) + + calculation = runschema.run.Run.calculation + calculation.m_annotations = dict(xml=MappingAnnotationModel(path='.calculation')) + + +runschema.run.Run.m_def.m_annotations = dict( + xml=MappingAnnotationModel(path='modeling') +) + + +class VASPXMLParser(XMLParser): + @staticmethod + def get_eigenvalues_energies(value, n_spin, n_kpoints): + array = np.transpose(value)[0].T + return np.reshape(array, (n_spin, n_kpoints, len(array[0]))) + + @staticmethod + def get_version(version, sub_version, platform): + return ' '.join([' '.join(s.split()) for s in [version, sub_version, platform]]) + + @staticmethod + def slice(value): + return np.array(value)[2:] diff --git a/nomad/parsing/file_parser/xml_parser.py b/nomad/parsing/file_parser/xml_parser.py index c2fd2d2958b5c505a556b6fe256f4cac61e48a6e..e1fecc6b3f32a359ad4c3ca96806cd56731ab4d8 100644 --- a/nomad/parsing/file_parser/xml_parser.py +++ b/nomad/parsing/file_parser/xml_parser.py @@ -58,21 +58,11 @@ class XMLParser(FileParser): # I cannot use the lxml XMLParser directly because it is not compatible with # the ElementTree implementation. xml = etree.parse( - self.mainfile_mainfile_obj, parser=etree.XMLParser(recover=True) - ) - self._file_handler = ElementTree.fromstring(etree.tostring(xml)) - except Exception: - pass - - self.logger.error('failed to load xml file') - try: - # I cannot use the lxml XMLParser directly because it is not compatible with - # the ElementTree implementation. - xml = etree.parse( - self.open(self.mainfile), parser=etree.XMLParser(recover=True) + self.mainfile_obj, parser=etree.XMLParser(recover=True) ) self._file_handler = ElementTree.fromstring(etree.tostring(xml)) except Exception: + self.logger.error('failed to load xml file') pass self.init_parameters()