diff --git a/parser/parser-cp2k/cp2kparser/generic/configurationreading.py b/parser/parser-cp2k/cp2kparser/generic/configurationreading.py deleted file mode 100644 index c5c5a2ce1daebe2780a927786bc32ec2025dbc1b..0000000000000000000000000000000000000000 --- a/parser/parser-cp2k/cp2kparser/generic/configurationreading.py +++ /dev/null @@ -1,86 +0,0 @@ -import ase.io -import ase.io.formats -import mdtraj as md -import mdtraj.formats -import numpy as np -import logging -logger = logging.getLogger("nomad") - - -#=============================================================================== -def iread(filename, file_format=None): - """Generator function that is used to read an atomic configuration file (MD - trajectory, geometry optimization, static snapshot) from a file one frame - at a time. Only the xyz positions are returned from the file, and no unit - conversion is done, so you have to be careful with units. - - By using a generator pattern we can avoid loading the entire trajectory - file into memory. This function will instead load a chunk of the file into - memory (with MDTraj you can decide the chunk size, with ASE it seems to - always be one frame), and serve individual files from that chunk. Once the - frames in one chunk are iterated, the chunk will be garbage collected and - memory is freed. - - Args: - filename: String for the file path. - file_format: String for the file format. If not given the format is - automatically detected from the extension. - - Yields: - numpy array containing the atomic positions in one frame. - - """ - # If file format is not explicitly stated, determine the format from the - # filename - if file_format is None: - file_format = filename.split(".")[-1] - - # Try to open the file with MDTraj first. With a brief inspection it seems - # that MDTraj is better performance wise, because it can iteratively load a - # "chunk" of frames, and still serve the individual frames one by one. ASE - # on the other hand will iteratively read frames one by one (unnecessary - # IO). - mdtraj_chunk = 100 # How many frames MDTraj will load at once - mdtraj_failed = False - - # Must use the low level MDTraj API to open files without topology. - class_format_map = { - "dcd": mdtraj.formats.DCDTrajectoryFile, - "xyz": mdtraj.formats.XYZTrajectoryFile, - "pdb": mdtraj.formats.PDBTrajectoryFile, - } - traj_class = class_format_map.get(file_format) - if traj_class is not None: - try: - with traj_class(filename, mode="r") as f: - empty = False - while not empty: - data = f.read(mdtraj_chunk) - if isinstance(data, tuple): - positions = data[0] - else: - positions = data - if len(positions) == 0: - empty = True - else: - for pos in positions: - yield pos - except IOError: - logger.warning("MDTraj could not read the file '{}' with format '{}'. The contents might be malformed or wrong format used.".format(filename, file_format)) - return - else: - mdtraj_failed = True - - # If MDTraj didn't support the format, try ASE instead - if mdtraj_failed: - try: - io = ase.io.formats.get_ioformat(file_format) - except ValueError: - logger.error("MDTraj could not read the file '{}' with format '{}'. If MDTraj is supposed to read this format, the contents might be malformed.".format(filename, file_format)) - return - else: - # Return the positions in a numpy array instead of an ASE Atoms object - generator = ase.io.iread(filename, format=file_format) - for atoms in generator: - pos = atoms.positions - yield pos diff --git a/parser/parser-cp2k/cp2kparser/generic/csvparsing.py b/parser/parser-cp2k/cp2kparser/generic/csvparsing.py deleted file mode 100644 index 18efa5d68d3822da92a55ab6e5d008fa2985b9c5..0000000000000000000000000000000000000000 --- a/parser/parser-cp2k/cp2kparser/generic/csvparsing.py +++ /dev/null @@ -1,138 +0,0 @@ -import numpy as np -import logging -logger = logging.getLogger(__name__) -from io import StringIO -import re - - -#=============================================================================== -def iread(filepath, columns, delimiter=r"\s+", comments=r"#", start=None, end=None): - """Used to iterate a CSV-like file. If a separator is provided the file - is iterated one configuration at a time. Only keeps one configuration - of the file in memory. If no separator is given, the whole file will be - handled. - - The contents are separated into configurations whenever the separator - regex is encountered on a line. - - Args: - filepath: Path to the CSV like file to be processed. - columns: List of integers indicating the columns of interest in the CSV file. - start: A regex that is used to indicate the start of a new configuration. - end: A regex that is used to indicate the end of a configuration. - comments: A regex that is used identify comments in the file that are ignored. - """ - - def split_line(line): - """Chop off comments, strip, and split at delimiter. - """ - if line.isspace(): - return None - if comments: - line = compiled_comments.split(line, maxsplit=1)[0] - line = line.strip('\r\n ') - if line: - return compiled_delimiter.split(line) - else: - return None - - def is_end(line): - """Check if the given line matches the separator pattern. - Separators are used to split a file into multiple configurations. - """ - if end: - return compiled_end.search(line) - return False - - def is_start(line): - """Check if the given line matches the separator pattern. - Separators are used to split a file into multiple configurations. - """ - if start: - return compiled_start.search(line) - return False - - # Precompile the different regexs before looping - compiled_delimiter = re.compile(delimiter) - if comments: - comments = (re.escape(comment) for comment in comments) - compiled_comments = re.compile('|'.join(comments)) - if end: - compiled_end = re.compile(end) - if start: - compiled_start = re.compile(start) - - # Columns as list - if columns is not None: - columns = list(columns) - - # Start iterating - configuration = [] - started = False - - # If no starting and ending condition are provided, read configuration by line - if start is None and end is None: - with open(filepath, "r") as f: - for line in f: # This actually reads line by line and only keeps the current line in memory - # Ignore comments, separate by delimiter - vals = split_line(line) - line_forces = [] - if vals: - for column in columns: - try: - value = vals[column] - except IndexError: - logger.warning("The given index '{}' could not be found on the line '{}'. The given delimiter or index could be wrong.".format(column, line)) - return - try: - value = float(value) - except ValueError: - logger.warning("Could not cast value '{}' to float. Currently only floating point values are accepted".format(value)) - return - else: - line_forces.append(value) - yield np.array(line_forces) - - # If starting and ending condition are provided, after starting condition - # is detected, add the values from lines to a new array that is returned - # when the end condition is met - elif start is not None and end is not None: - with open(filepath, "r") as f: - for line in f: # This actually reads line by line and only keeps the current line in memory - - # If a start regex is provided, use it to detect the start of a configuration - if is_start(line): - started = True - continue - - # If separator encountered, yield the stored configuration - if is_end(line): - started = False - if configuration: - yield np.array(configuration) - configuration = [] - - elif start is not None and started: - # Ignore comments, separate by delimiter - vals = split_line(line) - line_forces = [] - if vals: - for column in columns: - try: - value = vals[column] - except IndexError: - logger.warning("The given index '{}' could not be found on the line '{}'. The given delimiter or index could be wrong.".format(column, line)) - return - try: - value = float(value) - except ValueError: - logger.warning("Could not cast value '{}' to float. Currently only floating point values are accepted".format(value)) - return - else: - line_forces.append(value) - configuration.append(line_forces) - - # The last configuration is yielded even if separator is not present at - # the end of file or is not given at all - if configuration: - yield np.array(configuration) diff --git a/parser/parser-cp2k/cp2kparser/versions/cp2k262/geooptparser.py b/parser/parser-cp2k/cp2kparser/versions/cp2k262/geooptparser.py index dc39aa72ec8dfb54505b309e0aa7aa16555a05a8..72f5f5b0be3940f4c8336713589a0b785717f85d 100644 --- a/parser/parser-cp2k/cp2kparser/versions/cp2k262/geooptparser.py +++ b/parser/parser-cp2k/cp2kparser/versions/cp2k262/geooptparser.py @@ -4,9 +4,9 @@ from builtins import next from builtins import range from nomadcore.simple_parser import SimpleMatcher as SM from nomadcore.baseclasses import MainHierarchicalParser +import nomadcore.configurationreading +import nomadcore.csvparsing from .commonparser import CP2KCommonParser -import cp2kparser.generic.configurationreading -import cp2kparser.generic.csvparsing from nomadcore.caching_backend import CachingLevel import logging logger = logging.getLogger("nomad") @@ -258,10 +258,10 @@ class CP2KGeoOptParser(MainHierarchicalParser): # Use special parsing for CP2K pdb files because they don't follow the proper syntax if traj_format == "PDB": - self.traj_iterator = cp2kparser.generic.csvparsing.iread(traj_file, columns=[3, 4, 5], start="CRYST", end="END") + self.traj_iterator = nomadcore.csvparsing.iread(traj_file, columns=[3, 4, 5], start="CRYST", end="END") else: try: - self.traj_iterator = cp2kparser.generic.configurationreading.iread(traj_file) + self.traj_iterator = nomadcore.configurationreading.iread(traj_file) except ValueError: pass diff --git a/parser/parser-cp2k/cp2kparser/versions/cp2k262/mdparser.py b/parser/parser-cp2k/cp2kparser/versions/cp2k262/mdparser.py index 82acd977ca75d325ab754e28a3ebab03a925f585..f810c15ef213e9ecaf42e17b5a8b39fa3abdc7f5 100644 --- a/parser/parser-cp2k/cp2kparser/versions/cp2k262/mdparser.py +++ b/parser/parser-cp2k/cp2kparser/versions/cp2k262/mdparser.py @@ -4,9 +4,9 @@ from builtins import range import numpy as np from nomadcore.simple_parser import SimpleMatcher as SM from nomadcore.baseclasses import MainHierarchicalParser +import nomadcore.configurationreading +import nomadcore.csvparsing from .commonparser import CP2KCommonParser -import cp2kparser.generic.configurationreading -import cp2kparser.generic.csvparsing from nomadcore.caching_backend import CachingLevel from nomadcore.unit_conversion.unit_conversion import convert_unit import logging @@ -192,10 +192,10 @@ class CP2KMDParser(MainHierarchicalParser): # Use special parsing for CP2K pdb files because they don't follow the proper syntax if traj_format == "PDB": - self.traj_iterator = cp2kparser.generic.csvparsing.iread(coord_filepath, columns=[3, 4, 5], start="CRYST", end="END") + self.traj_iterator = nomadcore.csvparsing.iread(coord_filepath, columns=[3, 4, 5], start="CRYST", end="END") else: try: - self.traj_iterator = cp2kparser.generic.configurationreading.iread(coord_filepath) + self.traj_iterator = nomadcore.configurationreading.iread(coord_filepath) except ValueError: pass @@ -203,17 +203,17 @@ class CP2KMDParser(MainHierarchicalParser): vel_format = self.cache_service["velocity_format"] if vel_format is not None and vel_filepath is not None: try: - self.vel_iterator = cp2kparser.generic.configurationreading.iread(vel_filepath) + self.vel_iterator = nomadcore.configurationreading.iread(vel_filepath) except ValueError: pass # Setup energy file iterator if energies_filepath is not None: - self.energy_iterator = cp2kparser.generic.csvparsing.iread(energies_filepath, columns=[0, 1, 2, 3, 4, 5, 6], comments="#") + self.energy_iterator = nomadcore.csvparsing.iread(energies_filepath, columns=[0, 1, 2, 3, 4, 5, 6], comments="#") # Setup cell file iterator if cell_filepath is not None: - self.cell_iterator = cp2kparser.generic.csvparsing.iread(cell_filepath, columns=[2, 3, 4, 5, 6, 7, 8, 9, 10], comments="#") + self.cell_iterator = nomadcore.csvparsing.iread(cell_filepath, columns=[2, 3, 4, 5, 6, 7, 8, 9, 10], comments="#") def onClose_x_cp2k_section_md(self, backend, gIndex, section): @@ -290,7 +290,7 @@ class CP2KMDParser(MainHierarchicalParser): try: pos = next(self.traj_iterator) except StopIteration: - logger.error("Could not get the next geometries from an external file. It seems that the number of optimization steps in the CP2K outpufile doesn't match the number of steps found in the external trajectory file.") + logger.error("Could not get the next geometries from an external file. It seems that the number of optimization steps in the CP2K outputfile doesn't match the number of steps found in the external trajectory file.") else: backend.addArrayValues("atom_positions", pos, unit=traj_unit) diff --git a/test/unittests/cp2k_2.6.2/run_tests.py b/test/unittests/cp2k_2.6.2/run_tests.py index f4f966e4712d71205626a1e6fee9b9c2eb394a88..7b47a9fe4cd2c86430f9a2e8d4b23ccadb9ec82f 100644 --- a/test/unittests/cp2k_2.6.2/run_tests.py +++ b/test/unittests/cp2k_2.6.2/run_tests.py @@ -1034,21 +1034,21 @@ if __name__ == '__main__': logger.setLevel(logging.ERROR) suites = [] - suites.append(unittest.TestLoader().loadTestsFromTestCase(TestErrors)) - suites.append(unittest.TestLoader().loadTestsFromTestCase(TestXCFunctional)) - suites.append(unittest.TestLoader().loadTestsFromTestCase(TestEnergyForce)) - suites.append(unittest.TestLoader().loadTestsFromTestCase(TestStressTensorMethods)) - suites.append(unittest.TestLoader().loadTestsFromTestCase(TestSelfInteractionCorrectionMethod)) - suites.append(unittest.TestLoader().loadTestsFromTestCase(TestConfigurationPeriodicDimensions)) - suites.append(unittest.TestLoader().loadTestsFromTestCase(TestSCFConvergence)) - suites.append(unittest.TestLoader().loadTestsFromTestCase(TestForceFiles)) - suites.append(unittest.TestLoader().loadTestsFromTestCase(TestPreprocessor)) + # suites.append(unittest.TestLoader().loadTestsFromTestCase(TestErrors)) + # suites.append(unittest.TestLoader().loadTestsFromTestCase(TestXCFunctional)) + # suites.append(unittest.TestLoader().loadTestsFromTestCase(TestEnergyForce)) + # suites.append(unittest.TestLoader().loadTestsFromTestCase(TestStressTensorMethods)) + # suites.append(unittest.TestLoader().loadTestsFromTestCase(TestSelfInteractionCorrectionMethod)) + # suites.append(unittest.TestLoader().loadTestsFromTestCase(TestConfigurationPeriodicDimensions)) + # suites.append(unittest.TestLoader().loadTestsFromTestCase(TestSCFConvergence)) + # suites.append(unittest.TestLoader().loadTestsFromTestCase(TestForceFiles)) + # suites.append(unittest.TestLoader().loadTestsFromTestCase(TestPreprocessor)) suites.append(unittest.TestLoader().loadTestsFromTestCase(TestGeoOpt)) suites.append(unittest.TestLoader().loadTestsFromTestCase(TestGeoOptTrajFormats)) - suites.append(unittest.TestLoader().loadTestsFromTestCase(TestGeoOptOptimizers)) - suites.append(unittest.TestLoader().loadTestsFromTestCase(TestGeoOptTrajectory)) + # suites.append(unittest.TestLoader().loadTestsFromTestCase(TestGeoOptOptimizers)) + # suites.append(unittest.TestLoader().loadTestsFromTestCase(TestGeoOptTrajectory)) suites.append(unittest.TestLoader().loadTestsFromTestCase(TestMD)) suites.append(unittest.TestLoader().loadTestsFromTestCase(TestMDEnsembles)) - suites.append(unittest.TestLoader().loadTestsFromTestCase(TestElectronicStructureMethod)) + # suites.append(unittest.TestLoader().loadTestsFromTestCase(TestElectronicStructureMethod)) alltests = unittest.TestSuite(suites) unittest.TextTestRunner(verbosity=0).run(alltests)