From a3c66ab14d2e281fa4adb82ad3ed88efdc4e32d0 Mon Sep 17 00:00:00 2001 From: "Himanen, Lauri (himanel1)" <lauri.himanen@aalto.fi> Date: Tue, 19 Jan 2016 12:21:44 +0200 Subject: [PATCH] Renamed and moved stuff, added initial support for the scala call. --- parser/parser-cp2k/cp2kparser/__init__.py | 2 - parser/parsercp2k/__init__.py | 2 + .../cp2kparser => parsercp2k}/__main__.py | 0 .../parsing => parsercp2k}/parser.py | 7 +- .../parsing/__init__.py | 0 .../parsing/autoparser.py | 4 +- .../parsing/cp2kinputenginedata/__init__.py | 0 .../cp2k_262/cp2k_input.xml | 0 .../cp2k_262/cp2k_input_tree.pickle | Bin .../cp2k_262/references.html | 0 .../cp2kinputenginedata/cp2k_262/units.html | 0 .../parsing/cp2kinputenginedata/input_tree.py | 0 .../cp2kinputenginedata/xmlpreparser.py | 0 .../parsing/csvparsing.py | 0 .../parsing/implementations.py | 418 +++++++++--------- .../parsing/inputparsing.py | 0 .../parsing/outputparsing.py | 5 +- parser/parsercp2k/setup_paths.py | 8 + .../utils/__init__.py | 0 .../utils/baseclasses.py | 81 ++-- .../utils/logconfig.py | 0 .../utils/testing.py | 0 parser/{parser-cp2k => }/setup.py | 4 +- 23 files changed, 267 insertions(+), 264 deletions(-) delete mode 100644 parser/parser-cp2k/cp2kparser/__init__.py create mode 100644 parser/parsercp2k/__init__.py rename parser/{parser-cp2k/cp2kparser => parsercp2k}/__main__.py (100%) rename parser/{parser-cp2k/cp2kparser/parsing => parsercp2k}/parser.py (96%) rename parser/{parser-cp2k/cp2kparser => parsercp2k}/parsing/__init__.py (100%) rename parser/{parser-cp2k/cp2kparser => parsercp2k}/parsing/autoparser.py (89%) rename parser/{parser-cp2k/cp2kparser => parsercp2k}/parsing/cp2kinputenginedata/__init__.py (100%) rename parser/{parser-cp2k/cp2kparser => parsercp2k}/parsing/cp2kinputenginedata/cp2k_262/cp2k_input.xml (100%) rename parser/{parser-cp2k/cp2kparser => parsercp2k}/parsing/cp2kinputenginedata/cp2k_262/cp2k_input_tree.pickle (100%) rename parser/{parser-cp2k/cp2kparser => parsercp2k}/parsing/cp2kinputenginedata/cp2k_262/references.html (100%) rename parser/{parser-cp2k/cp2kparser => parsercp2k}/parsing/cp2kinputenginedata/cp2k_262/units.html (100%) rename parser/{parser-cp2k/cp2kparser => parsercp2k}/parsing/cp2kinputenginedata/input_tree.py (100%) rename parser/{parser-cp2k/cp2kparser => parsercp2k}/parsing/cp2kinputenginedata/xmlpreparser.py (100%) rename parser/{parser-cp2k/cp2kparser => parsercp2k}/parsing/csvparsing.py (100%) rename parser/{parser-cp2k/cp2kparser => parsercp2k}/parsing/implementations.py (63%) rename parser/{parser-cp2k/cp2kparser => parsercp2k}/parsing/inputparsing.py (100%) rename parser/{parser-cp2k/cp2kparser => parsercp2k}/parsing/outputparsing.py (98%) create mode 100644 parser/parsercp2k/setup_paths.py rename parser/{parser-cp2k/cp2kparser => parsercp2k}/utils/__init__.py (100%) rename parser/{parser-cp2k/cp2kparser => parsercp2k}/utils/baseclasses.py (86%) rename parser/{parser-cp2k/cp2kparser => parsercp2k}/utils/logconfig.py (100%) rename parser/{parser-cp2k/cp2kparser => parsercp2k}/utils/testing.py (100%) rename parser/{parser-cp2k => }/setup.py (91%) diff --git a/parser/parser-cp2k/cp2kparser/__init__.py b/parser/parser-cp2k/cp2kparser/__init__.py deleted file mode 100644 index 0a1233a..0000000 --- a/parser/parser-cp2k/cp2kparser/__init__.py +++ /dev/null @@ -1,2 +0,0 @@ -import cp2kparser.utils.logconfig -from cp2kparser.parsing.parser import CP2KParser diff --git a/parser/parsercp2k/__init__.py b/parser/parsercp2k/__init__.py new file mode 100644 index 0000000..26705c5 --- /dev/null +++ b/parser/parsercp2k/__init__.py @@ -0,0 +1,2 @@ +import parsercp2k.utils.logconfig +from parsercp2k.parser import CP2KParser diff --git a/parser/parser-cp2k/cp2kparser/__main__.py b/parser/parsercp2k/__main__.py similarity index 100% rename from parser/parser-cp2k/cp2kparser/__main__.py rename to parser/parsercp2k/__main__.py diff --git a/parser/parser-cp2k/cp2kparser/parsing/parser.py b/parser/parsercp2k/parser.py similarity index 96% rename from parser/parser-cp2k/cp2kparser/parsing/parser.py rename to parser/parsercp2k/parser.py index 50a87ce..be9428e 100644 --- a/parser/parser-cp2k/cp2kparser/parsing/parser.py +++ b/parser/parsercp2k/parser.py @@ -1,7 +1,8 @@ import re import logging -from cp2kparser.utils.baseclasses import Parser -from cp2kparser.parsing.implementations import * +import parsercp2k.setup_paths +from parsercp2k.utils.baseclasses import Parser +from parsercp2k.parsing.implementations import * logger = logging.getLogger(__name__) @@ -75,7 +76,5 @@ class CP2KParser(Parser): #=============================================================================== # This is what gets run when the scala layer calls for this parser if __name__ == "__main__": - - cp2kparser = CP2KParser() cp2kparser.scala_main_function() diff --git a/parser/parser-cp2k/cp2kparser/parsing/__init__.py b/parser/parsercp2k/parsing/__init__.py similarity index 100% rename from parser/parser-cp2k/cp2kparser/parsing/__init__.py rename to parser/parsercp2k/parsing/__init__.py diff --git a/parser/parser-cp2k/cp2kparser/parsing/autoparser.py b/parser/parsercp2k/parsing/autoparser.py similarity index 89% rename from parser/parser-cp2k/cp2kparser/parsing/autoparser.py rename to parser/parsercp2k/parsing/autoparser.py index 03899bf..bebecb4 100644 --- a/parser/parser-cp2k/cp2kparser/parsing/autoparser.py +++ b/parser/parsercp2k/parsing/autoparser.py @@ -1,8 +1,8 @@ import sys import os import cStringIO -from cp2kparser.implementation.cp2kparserversioner import CP2KParserVersioner -from cp2kparser.generics.testing import get_parser +from parsercp2k.implementation.cp2kparserversioner import CP2KParserVersioner +from parsercp2k.generics.testing import get_parser #=============================================================================== diff --git a/parser/parser-cp2k/cp2kparser/parsing/cp2kinputenginedata/__init__.py b/parser/parsercp2k/parsing/cp2kinputenginedata/__init__.py similarity index 100% rename from parser/parser-cp2k/cp2kparser/parsing/cp2kinputenginedata/__init__.py rename to parser/parsercp2k/parsing/cp2kinputenginedata/__init__.py diff --git a/parser/parser-cp2k/cp2kparser/parsing/cp2kinputenginedata/cp2k_262/cp2k_input.xml b/parser/parsercp2k/parsing/cp2kinputenginedata/cp2k_262/cp2k_input.xml similarity index 100% rename from parser/parser-cp2k/cp2kparser/parsing/cp2kinputenginedata/cp2k_262/cp2k_input.xml rename to parser/parsercp2k/parsing/cp2kinputenginedata/cp2k_262/cp2k_input.xml diff --git a/parser/parser-cp2k/cp2kparser/parsing/cp2kinputenginedata/cp2k_262/cp2k_input_tree.pickle b/parser/parsercp2k/parsing/cp2kinputenginedata/cp2k_262/cp2k_input_tree.pickle similarity index 100% rename from parser/parser-cp2k/cp2kparser/parsing/cp2kinputenginedata/cp2k_262/cp2k_input_tree.pickle rename to parser/parsercp2k/parsing/cp2kinputenginedata/cp2k_262/cp2k_input_tree.pickle diff --git a/parser/parser-cp2k/cp2kparser/parsing/cp2kinputenginedata/cp2k_262/references.html b/parser/parsercp2k/parsing/cp2kinputenginedata/cp2k_262/references.html similarity index 100% rename from parser/parser-cp2k/cp2kparser/parsing/cp2kinputenginedata/cp2k_262/references.html rename to parser/parsercp2k/parsing/cp2kinputenginedata/cp2k_262/references.html diff --git a/parser/parser-cp2k/cp2kparser/parsing/cp2kinputenginedata/cp2k_262/units.html b/parser/parsercp2k/parsing/cp2kinputenginedata/cp2k_262/units.html similarity index 100% rename from parser/parser-cp2k/cp2kparser/parsing/cp2kinputenginedata/cp2k_262/units.html rename to parser/parsercp2k/parsing/cp2kinputenginedata/cp2k_262/units.html diff --git a/parser/parser-cp2k/cp2kparser/parsing/cp2kinputenginedata/input_tree.py b/parser/parsercp2k/parsing/cp2kinputenginedata/input_tree.py similarity index 100% rename from parser/parser-cp2k/cp2kparser/parsing/cp2kinputenginedata/input_tree.py rename to parser/parsercp2k/parsing/cp2kinputenginedata/input_tree.py diff --git a/parser/parser-cp2k/cp2kparser/parsing/cp2kinputenginedata/xmlpreparser.py b/parser/parsercp2k/parsing/cp2kinputenginedata/xmlpreparser.py similarity index 100% rename from parser/parser-cp2k/cp2kparser/parsing/cp2kinputenginedata/xmlpreparser.py rename to parser/parsercp2k/parsing/cp2kinputenginedata/xmlpreparser.py diff --git a/parser/parser-cp2k/cp2kparser/parsing/csvparsing.py b/parser/parsercp2k/parsing/csvparsing.py similarity index 100% rename from parser/parser-cp2k/cp2kparser/parsing/csvparsing.py rename to parser/parsercp2k/parsing/csvparsing.py diff --git a/parser/parser-cp2k/cp2kparser/parsing/implementations.py b/parser/parsercp2k/parsing/implementations.py similarity index 63% rename from parser/parser-cp2k/cp2kparser/parsing/implementations.py rename to parser/parsercp2k/parsing/implementations.py index 907159b..9ea3cf6 100644 --- a/parser/parser-cp2k/cp2kparser/parsing/implementations.py +++ b/parser/parsercp2k/parsing/implementations.py @@ -1,11 +1,11 @@ import re import os import logging -from cp2kparser.parsing.csvparsing import CSVParser -from cp2kparser.parsing.inputparsing import CP2KInputParser -from cp2kparser.parsing.cp2kinputenginedata.input_tree import CP2KInput -from cp2kparser.parsing.outputparsing import * -from cp2kparser.utils.baseclasses import ParserImplementation +from parsercp2k.parsing.csvparsing import CSVParser +from parsercp2k.parsing.inputparsing import CP2KInputParser +from parsercp2k.parsing.cp2kinputenginedata.input_tree import CP2KInput +from parsercp2k.parsing.outputparsing import * +from parsercp2k.utils.baseclasses import ParserImplementation from nomadcore.coordinate_reader import CoordinateReader logger = logging.getLogger(__name__) @@ -266,8 +266,6 @@ class CP2KImplementation262(ParserImplementation): results are outputted to std.out by using the backend. The scala layer will the take on from that. """ - # Write the starting bracket - self.backend.fileOut.write("[") # Use the SimpleMatcher to extract most of the results parserInfo = {"name": "cp2k-parser", "version": "1.0"} @@ -276,10 +274,6 @@ class CP2KImplementation262(ParserImplementation): cachingLevelForMetaName = self.outputparser.cachingLevelForMetaName self.parse_file(outputfilename, outputstructure, parserInfo, cachingLevelForMetaName, superContext=self.outputparser) - # Then extract the things that cannot be extracted by the SimpleMatcher - - # Write the ending bracket - self.backend.fileOut.write("]\n") # def _Q_energy_total(self): # """Return the total energy from the bottom of the input file""" @@ -288,211 +282,211 @@ class CP2KImplementation262(ParserImplementation): # result.value = float(self.regexengine.parse(self.regexs.energy_total, self.parser.get_file_handle("output"))) # return result - def _Q_particle_forces(self): - """Return the forces that are controlled by - "FORCE_EVAL/PRINT/FORCES/FILENAME". These forces are typicalle printed - out during optimization or single point calculation. - - Supports forces printed in the output file or in a single XYZ file. - """ - result = Result() - result.unit = "force_au" - - # Determine if a separate force file is used or are the forces printed - # in the output file. - separate_file = True - filename = self.input_tree.get_keyword("FORCE_EVAL/PRINT/FORCES/FILENAME") - if not filename or filename == "__STD_OUT__": - separate_file = False - - # Look for the forces either in output or in separate file - if not separate_file: - logger.debug("Looking for forces in output file.") - forces = self.regexengine.parse(self.regexs.particle_forces, self.parser.get_file_handle("output")) - if forces is None: - msg = "No force configurations were found when searching the output file." - logger.warning(msg) - result.error_message = msg - result.code = ResultCode.fail - return result - - # Insert force configuration into the array - i_conf = 0 - force_array = None - for force_conf in forces: - iterator = self.csvengine.iread(force_conf, columns=(-3, -2, -1), comments=("#", "ATOMIC", "SUM"), separator=None) - i_force_array = iterator.next() - - # Initialize the numpy array if not done yet - n_particles = i_force_array.shape[0] - n_dim = i_force_array.shape[1] - n_confs = len(forces) - force_array = np.empty((n_confs, n_particles, n_dim)) - - force_array[i_conf, :, :] = i_force_array - i_conf += 1 - - result.value_iterable = force_array - return result - else: - logger.debug("Looking for forces in separate force file.") - iterator = self.csvengine.iread(self.parser.get_file_handle("forces"), columns=(-3, -2, -1), comments=("#", "SUM"), separator=r"\ ATOMIC FORCES in \[a\.u\.\]") - result.value_iterable = iterator - return result + # def _Q_particle_forces(self): + # """Return the forces that are controlled by + # "FORCE_EVAL/PRINT/FORCES/FILENAME". These forces are typicalle printed + # out during optimization or single point calculation. - def get_initial_atom_positions_and_unit(self): - """Returns the starting configuration of the atoms in the system. - """ - unit = "angstrom" - - # Check where the coordinates are specified - coord_format = self.input_tree.get_keyword("FORCE_EVAL/SUBSYS/TOPOLOGY/COORD_FILE_FORMAT") - if not coord_format: - coord_format = self.input_tree.get_keyword_default("FORCE_EVAL/SUBSYS/TOPOLOGY/COORD_FILE_FORMAT") - - # See if the coordinates are provided in the input file - if coord_format == "OFF": - logger.debug("Using coordinates from the input file.") - coords = self.input_tree.get_default_keyword("FORCE_EVAL/SUBSYS/COORD") - coords = coords.strip().split('\n') - positions = [] - for line in coords: - components = [float(x) for x in line.split()[1:]] - positions.append(components) - positions = np.array(positions) - return positions, unit - - elif coord_format in ["CP2K", "G96", "XTL", "CRD"]: - msg = "Tried to read the number of atoms from the initial configuration, but the parser does not yet support the '{}' format that is used by file '{}'.".format(coord_format, self.parser.file_ids["initial_coordinates"]) - logger.warning(msg) - else: - # External file, use AtomsEngine - init_coord_file = self.parser.get_file_handle("initial_coordinates") - if coord_format == "XYZ": - iter_pos = self.atomsengine.iread(init_coord_file, format="xyz") - if coord_format == "CIF": - iter_pos = self.atomsengine.iread(init_coord_file, format="cif") - if coord_format == "PDB": - iter_pos = self.atomsengine.iread(init_coord_file, format="pdb") - return next(iter_pos), unit - - # # Check if the unit cell is multiplied programmatically - # multiples = self.input_tree.get_keyword("FORCE_EVAL/SUBSYS/TOPOLOGY/MULTIPLE_UNIT_CELL") - # if not multiples: - # multiples = self.input_tree.get_keyword_default("FORCE_EVAL/SUBSYS/TOPOLOGY/MULTIPLE_UNIT_CELL") - # factors = [int(x) for x in multiples.split()] - # factor = np.prod(np.array(factors)) + # Supports forces printed in the output file or in a single XYZ file. + # """ + # result = Result() + # result.unit = "force_au" + + # # Determine if a separate force file is used or are the forces printed + # # in the output file. + # separate_file = True + # filename = self.input_tree.get_keyword("FORCE_EVAL/PRINT/FORCES/FILENAME") + # if not filename or filename == "__STD_OUT__": + # separate_file = False + + # # Look for the forces either in output or in separate file + # if not separate_file: + # logger.debug("Looking for forces in output file.") + # forces = self.regexengine.parse(self.regexs.particle_forces, self.parser.get_file_handle("output")) + # if forces is None: + # msg = "No force configurations were found when searching the output file." + # logger.warning(msg) + # result.error_message = msg + # result.code = ResultCode.fail + # return result + + # # Insert force configuration into the array + # i_conf = 0 + # force_array = None + # for force_conf in forces: + # iterator = self.csvengine.iread(force_conf, columns=(-3, -2, -1), comments=("#", "ATOMIC", "SUM"), separator=None) + # i_force_array = iterator.next() + + # # Initialize the numpy array if not done yet + # n_particles = i_force_array.shape[0] + # n_dim = i_force_array.shape[1] + # n_confs = len(forces) + # force_array = np.empty((n_confs, n_particles, n_dim)) + + # force_array[i_conf, :, :] = i_force_array + # i_conf += 1 + + # result.value_iterable = force_array + # return result + # else: + # logger.debug("Looking for forces in separate force file.") + # iterator = self.csvengine.iread(self.parser.get_file_handle("forces"), columns=(-3, -2, -1), comments=("#", "SUM"), separator=r"\ ATOMIC FORCES in \[a\.u\.\]") + # result.value_iterable = iterator + # return result - def get_atom_positions_and_unit(self): - """Returns the atom positions and unit that were calculated during the - simulation. - """ - # Determine the unit - unit_path = "MOTION/PRINT/TRAJECTORY/UNIT" - unit = self.input_tree.get_keyword(unit_path) - # unit = unit.lower() - unit = CP2KInput.decode_cp2k_unit(unit) - - # Read the trajectory - traj_file = self.get_file_handle("trajectory", show_warning=False) - if not traj_file: - logger.debug("No trajectory file detected.") - return None, None - - input_file_format = self.input_tree.get_keyword("MOTION/PRINT/TRAJECTORY/FORMAT") - file_format = { - "XYZ": "xyz", - "XMOL": "xyz", - "PDB": "pdb-cp2k", - "ATOMIC": "atomic", - }.get(input_file_format) - - if file_format is None: - logger.error("Unsupported trajectory file format '{}'.".format(input_file_format)) - - # Use a custom implementation for the CP2K specific weird formats - if file_format == "pdb-cp2k": - traj_iter = self.csvengine.iread(traj_file, columns=[3, 4, 5], comments=["TITLE", "AUTHOR", "REMARK", "CRYST"], separator="END") - elif file_format == "atomic": - n_atoms = self.get_result_object("particle_number").value - - def atomic_generator(): - conf = [] - i = 0 - for line in traj_file: - line = line.strip() - components = np.array([float(x) for x in line.split()]) - conf.append(components) - i += 1 - if i == n_atoms: - yield np.array(conf) - conf = [] - i = 0 - traj_iter = atomic_generator() - else: - traj_iter = self.atomsengine.iread(traj_file, format=file_format) + # def get_initial_atom_positions_and_unit(self): + # """Returns the starting configuration of the atoms in the system. + # """ + # unit = "angstrom" + + # # Check where the coordinates are specified + # coord_format = self.input_tree.get_keyword("FORCE_EVAL/SUBSYS/TOPOLOGY/COORD_FILE_FORMAT") + # if not coord_format: + # coord_format = self.input_tree.get_keyword_default("FORCE_EVAL/SUBSYS/TOPOLOGY/COORD_FILE_FORMAT") + + # # See if the coordinates are provided in the input file + # if coord_format == "OFF": + # logger.debug("Using coordinates from the input file.") + # coords = self.input_tree.get_default_keyword("FORCE_EVAL/SUBSYS/COORD") + # coords = coords.strip().split('\n') + # positions = [] + # for line in coords: + # components = [float(x) for x in line.split()[1:]] + # positions.append(components) + # positions = np.array(positions) + # return positions, unit + + # elif coord_format in ["CP2K", "G96", "XTL", "CRD"]: + # msg = "Tried to read the number of atoms from the initial configuration, but the parser does not yet support the '{}' format that is used by file '{}'.".format(coord_format, self.parser.file_ids["initial_coordinates"]) + # logger.warning(msg) + # else: + # # External file, use AtomsEngine + # init_coord_file = self.parser.get_file_handle("initial_coordinates") + # if coord_format == "XYZ": + # iter_pos = self.atomsengine.iread(init_coord_file, format="xyz") + # if coord_format == "CIF": + # iter_pos = self.atomsengine.iread(init_coord_file, format="cif") + # if coord_format == "PDB": + # iter_pos = self.atomsengine.iread(init_coord_file, format="pdb") + # return next(iter_pos), unit + + # # # Check if the unit cell is multiplied programmatically + # # multiples = self.input_tree.get_keyword("FORCE_EVAL/SUBSYS/TOPOLOGY/MULTIPLE_UNIT_CELL") + # # if not multiples: + # # multiples = self.input_tree.get_keyword_default("FORCE_EVAL/SUBSYS/TOPOLOGY/MULTIPLE_UNIT_CELL") + # # factors = [int(x) for x in multiples.split()] + # # factor = np.prod(np.array(factors)) + + # def get_atom_positions_and_unit(self): + # """Returns the atom positions and unit that were calculated during the + # simulation. + # """ + # # Determine the unit + # unit_path = "MOTION/PRINT/TRAJECTORY/UNIT" + # unit = self.input_tree.get_keyword(unit_path) + # # unit = unit.lower() + # unit = CP2KInput.decode_cp2k_unit(unit) + + # # Read the trajectory + # traj_file = self.get_file_handle("trajectory", show_warning=False) + # if not traj_file: + # logger.debug("No trajectory file detected.") + # return None, None + + # input_file_format = self.input_tree.get_keyword("MOTION/PRINT/TRAJECTORY/FORMAT") + # file_format = { + # "XYZ": "xyz", + # "XMOL": "xyz", + # "PDB": "pdb-cp2k", + # "ATOMIC": "atomic", + # }.get(input_file_format) + + # if file_format is None: + # logger.error("Unsupported trajectory file format '{}'.".format(input_file_format)) + + # # Use a custom implementation for the CP2K specific weird formats + # if file_format == "pdb-cp2k": + # traj_iter = self.csvengine.iread(traj_file, columns=[3, 4, 5], comments=["TITLE", "AUTHOR", "REMARK", "CRYST"], separator="END") + # elif file_format == "atomic": + # n_atoms = self.get_result_object("particle_number").value + + # def atomic_generator(): + # conf = [] + # i = 0 + # for line in traj_file: + # line = line.strip() + # components = np.array([float(x) for x in line.split()]) + # conf.append(components) + # i += 1 + # if i == n_atoms: + # yield np.array(conf) + # conf = [] + # i = 0 + # traj_iter = atomic_generator() + # else: + # traj_iter = self.atomsengine.iread(traj_file, format=file_format) - # Return the iterator and unit - return (traj_iter, unit) + # # Return the iterator and unit + # return (traj_iter, unit) - def get_functionals(self): - """Used to search the input file for a functional definition - """ - # First try to look at the shortcut - xc_shortcut = self.input_tree.get_parameter("FORCE_EVAL/DFT/XC/XC_FUNCTIONAL") - if xc_shortcut is not None and xc_shortcut != "NONE" and xc_shortcut != "NO_SHORTCUT": - logger.debug("Shortcut defined for XC_FUNCTIONAL") - - # If PBE, check version - if xc_shortcut == "PBE": - pbe_version = self.input_tree.get_keyword("FORCE_EVAL/DFT/XC/XC_FUNCTIONAL/PBE/PARAMETRIZATION") - result.value = { - 'ORIG': "GGA_X_PBE", - 'PBESOL': "GGA_X_PBE_SOL", - 'REVPBE': "GGA_X_PBE_R", - }.get(pbe_version, "GGA_X_PBE") - return result - - result.value = { - 'B3LYP': "HYB_GGA_XC_B3LYP", - 'BEEFVDW': None, - 'BLYP': "GGA_C_LYP_GGA_X_B88", - 'BP': None, - 'HCTH120': None, - 'OLYP': None, - 'LDA': "LDA_XC_TETER93", - 'PADE': "LDA_XC_TETER93", - 'PBE0': None, - 'TPSS': None, - }.get(xc_shortcut, None) - return result - else: - logger.debug("No shortcut defined for XC_FUNCTIONAL. Looking into subsections.") - - # Look at the subsections and determine what part have been activated - - # Becke88 - xc_components = [] - becke_88 = self.input_tree.get_parameter("FORCE_EVAL/DFT/XC/XC_FUNCTIONAL/BECKE88") - if becke_88 == "TRUE": - xc_components.append("GGA_X_B88") - - # Becke 97 - becke_97 = self.input_tree.get_parameter("FORCE_EVAL/DFT/XC/XC_FUNCTIONAL/BECKE97") - if becke_97 == "TRUE": - becke_97_param = self.input_tree.get_keyword("FORCE_EVAL/DFT/XC/XC_FUNCTIONAL/BECKE97/PARAMETRIZATION") - becke_97_result = { - 'B97GRIMME': None, - 'B97_GRIMME': None, - 'ORIG': "GGA_XC_B97", - 'WB97X-V': None, - }.get(becke_97_param, None) - if becke_97_result is not None: - xc_components.append(becke_97_result) - - # Return an alphabetically sorted and joined list of the xc components - result.value = "_".join(sorted(xc_components)) - return result + # def get_functionals(self): + # """Used to search the input file for a functional definition + # """ + # # First try to look at the shortcut + # xc_shortcut = self.input_tree.get_parameter("FORCE_EVAL/DFT/XC/XC_FUNCTIONAL") + # if xc_shortcut is not None and xc_shortcut != "NONE" and xc_shortcut != "NO_SHORTCUT": + # logger.debug("Shortcut defined for XC_FUNCTIONAL") + + # # If PBE, check version + # if xc_shortcut == "PBE": + # pbe_version = self.input_tree.get_keyword("FORCE_EVAL/DFT/XC/XC_FUNCTIONAL/PBE/PARAMETRIZATION") + # result.value = { + # 'ORIG': "GGA_X_PBE", + # 'PBESOL': "GGA_X_PBE_SOL", + # 'REVPBE': "GGA_X_PBE_R", + # }.get(pbe_version, "GGA_X_PBE") + # return result + + # result.value = { + # 'B3LYP': "HYB_GGA_XC_B3LYP", + # 'BEEFVDW': None, + # 'BLYP': "GGA_C_LYP_GGA_X_B88", + # 'BP': None, + # 'HCTH120': None, + # 'OLYP': None, + # 'LDA': "LDA_XC_TETER93", + # 'PADE': "LDA_XC_TETER93", + # 'PBE0': None, + # 'TPSS': None, + # }.get(xc_shortcut, None) + # return result + # else: + # logger.debug("No shortcut defined for XC_FUNCTIONAL. Looking into subsections.") + + # # Look at the subsections and determine what part have been activated + + # # Becke88 + # xc_components = [] + # becke_88 = self.input_tree.get_parameter("FORCE_EVAL/DFT/XC/XC_FUNCTIONAL/BECKE88") + # if becke_88 == "TRUE": + # xc_components.append("GGA_X_B88") + + # # Becke 97 + # becke_97 = self.input_tree.get_parameter("FORCE_EVAL/DFT/XC/XC_FUNCTIONAL/BECKE97") + # if becke_97 == "TRUE": + # becke_97_param = self.input_tree.get_keyword("FORCE_EVAL/DFT/XC/XC_FUNCTIONAL/BECKE97/PARAMETRIZATION") + # becke_97_result = { + # 'B97GRIMME': None, + # 'B97_GRIMME': None, + # 'ORIG': "GGA_XC_B97", + # 'WB97X-V': None, + # }.get(becke_97_param, None) + # if becke_97_result is not None: + # xc_components.append(becke_97_result) + + # # Return an alphabetically sorted and joined list of the xc components + # result.value = "_".join(sorted(xc_components)) + # return result # #=============================================================================== # class CP2K_262_Implementation(CP2KImplementation): diff --git a/parser/parser-cp2k/cp2kparser/parsing/inputparsing.py b/parser/parsercp2k/parsing/inputparsing.py similarity index 100% rename from parser/parser-cp2k/cp2kparser/parsing/inputparsing.py rename to parser/parsercp2k/parsing/inputparsing.py diff --git a/parser/parser-cp2k/cp2kparser/parsing/outputparsing.py b/parser/parsercp2k/parsing/outputparsing.py similarity index 98% rename from parser/parser-cp2k/cp2kparser/parsing/outputparsing.py rename to parser/parsercp2k/parsing/outputparsing.py index 6408c1d..6ae0058 100644 --- a/parser/parser-cp2k/cp2kparser/parsing/outputparsing.py +++ b/parser/parsercp2k/parsing/outputparsing.py @@ -249,8 +249,9 @@ class CP2KOutputParser262(object): def onClose_cp2k_section_atom_position(self, backend, gIndex, section): """Get the initial atomic positions from cp2kparser. """ - positions, unit = self.cp2kparser.get_initial_atom_positions_and_unit() - backend.addArrayValues("atom_position", positions) + pass + # positions, unit = self.cp2kparser.get_initial_atom_positions_and_unit() + # backend.addArrayValues("atom_position", positions) def onClose_cp2k_section_md_coordinate_atom(self, backend, gIndex, section): """Given the string with the coordinate components for one atom, make it diff --git a/parser/parsercp2k/setup_paths.py b/parser/parsercp2k/setup_paths.py new file mode 100644 index 0000000..89200e0 --- /dev/null +++ b/parser/parsercp2k/setup_paths.py @@ -0,0 +1,8 @@ +import sys +import os +baseDir = os.path.dirname(os.path.abspath(__file__)) +commonDir = os.path.normpath(os.path.join(baseDir, "../../../../python-common/common/python")) + +if os.path.exists(commonDir): + if not commonDir in sys.path: + sys.path.insert(0, commonDir) diff --git a/parser/parser-cp2k/cp2kparser/utils/__init__.py b/parser/parsercp2k/utils/__init__.py similarity index 100% rename from parser/parser-cp2k/cp2kparser/utils/__init__.py rename to parser/parsercp2k/utils/__init__.py diff --git a/parser/parser-cp2k/cp2kparser/utils/baseclasses.py b/parser/parsercp2k/utils/baseclasses.py similarity index 86% rename from parser/parser-cp2k/cp2kparser/utils/baseclasses.py rename to parser/parsercp2k/utils/baseclasses.py index c23d7f9..c6a5450 100644 --- a/parser/parser-cp2k/cp2kparser/utils/baseclasses.py +++ b/parser/parsercp2k/utils/baseclasses.py @@ -1,21 +1,20 @@ import os import sys -import json import logging import StringIO -import argparse from abc import ABCMeta, abstractmethod +from parsercp2k.parsing.outputparsing import * from nomadcore.simple_parser import SimpleParserBuilder, defaultParseFile, extractOnCloseTriggers from nomadcore.local_meta_info import loadJsonFile, InfoKindEl -from nomadcore.parser_backend import JsonParseEventsWriterBackend from nomadcore.caching_backend import CachingLevel, ActiveBackend -from nomadcore.parse_streamed_dicts import ParseStreamedDicts +from nomadcore.simple_parser import mainFunction logger = logging.getLogger(__name__) #=============================================================================== class Parser(object): - """ + """A base class for nomad parsers. + Attributes: self.implementation: an object that actually does the parsing and is setup by this class based on the given contents. @@ -45,21 +44,22 @@ class Parser(object): contents = [contents] # Figure out all the files from the contents - files = set() - for content in contents: - if os.path.isdir(content): - dir_files = set() - for filename in os.listdir(content): - dir_files.add(os.path.join(content, filename)) - files |= dir_files - elif os.path.isfile(content): - files.add(content) - else: - logger.error("The string '{}' is not a valid path.".format(content)) + if contents: + files = set() + for content in contents: + if os.path.isdir(content): + dir_files = set() + for filename in os.listdir(content): + dir_files.add(os.path.join(content, filename)) + files |= dir_files + elif os.path.isfile(content): + files.add(content) + else: + logger.error("The string '{}' is not a valid path.".format(content)) - # Filter the files leaving only the parseable ones. Each parser can - # specify which files are of interest or to include them all. - self.parser_context.files = self.search_parseable_files(files) + # Filter the files leaving only the parseable ones. Each parser can + # specify which files are of interest or to include them all. + self.parser_context.files = self.search_parseable_files(files) @abstractmethod def setup(self): @@ -95,40 +95,41 @@ class Parser(object): self.setup() if not self.implementation: logger.error("No parser implementation has been setup.") + + # Write the starting bracket + self.backend.fileOut.write("[") + self.implementation.parse() + # Write the ending bracket + self.backend.fileOut.write("]\n") + def scala_main_function(self): """This function gets called when the scala calls for a parser. """ - # Parse the command line options - parser = argparse.ArgumentParser() - parser.add_argument('--specialize', help='Provide specialization information as the first JSON dictionary on stdin.') - parser.add_argument('--stream', type=str, nargs='+', help='Expects the files to parse via JSON dictionary on stdin.') - parser.add_argument('mainFilePath', type=str, nargs='+', help='Path to the main file.') - args = parser.parse_args() + + # Get the outputparser class + outputparser = globals()["CP2KOutputParser{}".format("262")](None, None) # Setup the metainfos metaInfoPath = os.path.normpath(os.path.join(os.path.dirname(os.path.abspath(__file__)), "../../../../nomad-meta-info/meta_info/nomad_meta_info/{}".format(self.get_metainfo_filename()))) metaInfoEnv, warnings = loadJsonFile(filePath=metaInfoPath, dependencyLoader=None, extraArgsHandling=InfoKindEl.ADD_EXTRA_ARGS, uri=None) - # Setup the JSON backend - backend = JsonParseEventsWriterBackend(metaInfoEnv, sys.stdout) + # Parser info + parserInfo = {'name': 'cp2k-parser', 'version': '1.0'} - # Setup the contents - contents = [args.mainFilePath] - # contents.extend(auxiliary_files) + # Adjust caching of metadata + cachingLevelForMetaName = outputparser.cachingLevelForMetaName - # Setup the specilalization info - dictReader = ParseStreamedDicts(sys.stdin) - if args.specialize: - specializationInfo = dictReader.readNextDict() - if specializationInfo is None or specializationInfo.get("type", "") != "nomad_parser_specialization_1_0": - raise Exception("expected a nomad_parser_specialization_1_0 as first dictionary, got " + json.dumps(specializationInfo)) - metainfo_to_keep = specializationInfo.get("metaInfoToKeep") + # Supercontext is where the objet where the callback functions for + # section closing are found + superContext = outputparser - # Parse - self.initialize(contents, metainfo_to_keep, backend) - self.parse() + # Main file description is the SimpleParser tree + mainFileDescription = outputparser.outputstructure + + # Use the main function from nomadcore + mainFunction(mainFileDescription, metaInfoEnv, parserInfo, superContext=superContext, cachingLevelForMetaName=cachingLevelForMetaName, onClose={}) #=============================================================================== diff --git a/parser/parser-cp2k/cp2kparser/utils/logconfig.py b/parser/parsercp2k/utils/logconfig.py similarity index 100% rename from parser/parser-cp2k/cp2kparser/utils/logconfig.py rename to parser/parsercp2k/utils/logconfig.py diff --git a/parser/parser-cp2k/cp2kparser/utils/testing.py b/parser/parsercp2k/utils/testing.py similarity index 100% rename from parser/parser-cp2k/cp2kparser/utils/testing.py rename to parser/parsercp2k/utils/testing.py diff --git a/parser/parser-cp2k/setup.py b/parser/setup.py similarity index 91% rename from parser/parser-cp2k/setup.py rename to parser/setup.py index 6f69bd8..4282820 100644 --- a/parser/parser-cp2k/setup.py +++ b/parser/setup.py @@ -5,7 +5,7 @@ from setuptools import setup, find_packages def main(): # Start package setup setup( - name="cp2kparser", + name="parsercp2k", version="0.1", include_package_data=True, package_data={ @@ -15,7 +15,7 @@ def main(): author="Lauri Himanen", author_email="lauri.himanen@gmail.com", license="GPL3", - packages=["cp2kparser"], + packages=["parsercp2k"], install_requires=[ 'pint', 'numpy', -- GitLab