From f0ce17c07dd0469944589a64699ee52d97f51f76 Mon Sep 17 00:00:00 2001 From: Lauri Himanen <lauri.himanen@aalto.fi> Date: Thu, 7 Jul 2016 15:01:05 +0300 Subject: [PATCH] Single point parsing basics done, trying to get the scala integration working. --- .../versions/cpmd41/commonparser.py | 7 + .../cpmdparser/versions/cpmd41/mainparser.py | 93 +++++++++-- .../eu/nomad_lab/parsers/CpmdParser.scala | 25 +-- .../eu/nomad_lab/parsers/CpmdParserSpec.scala | 6 + test/unittests/cpmd_4.1/run_tests.py | 146 ++++++------------ 5 files changed, 159 insertions(+), 118 deletions(-) diff --git a/parser/parser-cpmd/cpmdparser/versions/cpmd41/commonparser.py b/parser/parser-cpmd/cpmdparser/versions/cpmd41/commonparser.py index f168803..b398d1d 100644 --- a/parser/parser-cpmd/cpmdparser/versions/cpmd41/commonparser.py +++ b/parser/parser-cpmd/cpmdparser/versions/cpmd41/commonparser.py @@ -16,3 +16,10 @@ class CPMDCommonParser(CommonParser): def onClose_section_run(self, backend, gIndex, section): backend.addValue("program_name", "CPMD") backend.addValue("program_basis_set_type", "plane waves") + + def onClose_section_method(self, backend, gIndex, section): + backend.addValue("electronic_structure_method", "DFT") + basis_id = backend.openSection("section_method_basis_set") + backend.addValue("method_basis_set_kind", "wavefunction") + backend.addValue("mapping_section_method_basis_set_cell_associated", 0) + backend.closeSection("section_method_basis_set", basis_id) diff --git a/parser/parser-cpmd/cpmdparser/versions/cpmd41/mainparser.py b/parser/parser-cpmd/cpmdparser/versions/cpmd41/mainparser.py index e0a40df..dbc4e36 100644 --- a/parser/parser-cpmd/cpmdparser/versions/cpmd41/mainparser.py +++ b/parser/parser-cpmd/cpmdparser/versions/cpmd41/mainparser.py @@ -22,6 +22,7 @@ class CPMDMainParser(MainHierarchicalParser): """ super(CPMDMainParser, self).__init__(file_path, parser_context) self.setup_common_matcher(CPMDCommonParser(parser_context)) + self.n_scf_iterations = 0 #======================================================================= # Cache levels @@ -54,7 +55,7 @@ class CPMDMainParser(MainHierarchicalParser): SM( " PATH TO THE RESTART FILES:\s+{}".format(self.regexs.regex_eol)), SM( " GRAM-SCHMIDT ORTHOGONALIZATION"), SM( " MAXIMUM NUMBER OF STEPS:\s+{} STEPS".format(self.regexs.regex_i)), - SM( " MAXIMUM NUMBER OF ITERATIONS FOR SC:\s+{} STEPS".format(self.regexs.regex_i)), + SM( " MAXIMUM NUMBER OF ITERATIONS FOR SC:\s+(?P<scf_max_iteration>{}) STEPS".format(self.regexs.regex_i)), SM( " PRINT INTERMEDIATE RESULTS EVERY\s+{} STEPS".format(self.regexs.regex_i)), SM( " STORE INTERMEDIATE RESULTS EVERY\s+{} STEPS".format(self.regexs.regex_i)), SM( " NUMBER OF DISTINCT RESTART FILES:\s+{}".format(self.regexs.regex_i)), @@ -62,7 +63,7 @@ class CPMDMainParser(MainHierarchicalParser): SM( " FICTITIOUS ELECTRON MASS:\s+{}".format(self.regexs.regex_f)), SM( " TIME STEP FOR ELECTRONS:\s+{}".format(self.regexs.regex_f)), SM( " TIME STEP FOR IONS:\s+{}".format(self.regexs.regex_f)), - SM( " CONVERGENCE CRITERIA FOR WAVEFUNCTION OPTIMIZATION:\s+{}".format(self.regexs.regex_f)), + SM( " CONVERGENCE CRITERIA FOR WAVEFUNCTION OPTIMIZATION:\s+(?P<scf_threshold_energy_change__hartree>{})".format(self.regexs.regex_f)), SM( " WAVEFUNCTION OPTIMIZATION BY PRECONDITIONED DIIS"), SM( " THRESHOLD FOR THE WF-HESSIAN IS\s+{}".format(self.regexs.regex_f)), SM( " MAXIMUM NUMBER OF VECTORS RETAINED FOR DIIS:\s+{}".format(self.regexs.regex_i)), @@ -81,7 +82,7 @@ class CPMDMainParser(MainHierarchicalParser): # SM( " PROGRAM CPMD STARTED AT: (?P<x_cpmd_start_datetime>{})".format(self.regexs.regex_eol)), ] ), - SM( " ***************************** ATOMS ****************************".replace("*", "\*"), + SM( re.escape(" ***************************** ATOMS ****************************"), sections=["x_cpmd_section_system_information"], subMatchers=[ SM( " NR TYPE X(BOHR) Y(BOHR) Z(BOHR) MBL".replace("(", "\(").replace(")", "\)"), @@ -90,13 +91,19 @@ class CPMDMainParser(MainHierarchicalParser): SM( " CHARGE:\s+(?P<total_charge>{})".format(self.regexs.regex_i)), ] ), - SM( " \| Pseudopotential Report", + SM( re.escape(" | Pseudopotential Report"), sections=["x_cpmd_section_pseudopotential_information"], + ), + SM( re.escape(" * ATOM MASS RAGGIO NLCC PSEUDOPOTENTIAL *"), + sections=["x_cpmd_section_atom_kinds"], subMatchers=[ - # SM( " PROGRAM CPMD STARTED AT: (?P<x_cpmd_start_datetime>{})".format(self.regexs.regex_eol)), + SM( " \*\s+(?P<x_cpmd_atom_kind_label>{0})\s+(?P<x_cpmd_atom_kind_mass>{1})\s+(?P<x_cpmd_atom_kind_raggio>{1})\s+(?P<x_cpmd_atom_kind_nlcc>{0})\s+(?P<x_cpmd_atom_kind_pseudopotential_l>{0})\s+(?P<x_cpmd_atom_kind_pseudopotential_type>{0})\s+\*".format(self.regexs.regex_word, self.regexs.regex_f), + sections=["x_cpmd_section_atom_kind"], + repeats=True, + ), ] ), - SM( " ************************** SUPERCELL ***************************".replace("*", "\*"), + SM( re.escape(" ************************** SUPERCELL ***************************"), sections=["x_cpmd_section_supercell"], subMatchers=[ SM( " SYMMETRY:\s+(?P<x_cpmd_cell_symmetry>{})".format(self.regexs.regex_eol)), @@ -126,13 +133,13 @@ class CPMDMainParser(MainHierarchicalParser): SM( " NFI GEMAX CNORM ETOT DETOT TCPU", sections=["x_cpmd_section_scf"], subMatchers=[ - SM( "\s+{0}\s+{1}\s+{1}\s+{1}\s+{1}\s+{1}".format(self.regexs.regex_i, self.regexs.regex_f), - sections=["section_scf_iteration"], + SM( "\s+(?P<x_cpmd_scf_nfi>{0})\s+(?P<x_cpmd_scf_gemax>{1})\s+(?P<x_cpmd_scf_cnorm>{1})\s+(?P<x_cpmd_scf_etot__hartree>{1})\s+(?P<x_cpmd_scf_detot__hartree>{1})\s+(?P<x_cpmd_scf_tcpu__s>{1})".format(self.regexs.regex_i, self.regexs.regex_f), + sections=["x_cpmd_section_scf_iteration"], repeats=True, ), ] ), - SM( " * FINAL RESULTS *".replace("*", "\*"), + SM( re.escape(" * FINAL RESULTS *"), sections=["x_cpmd_section_final_results"], subMatchers=[ SM( " ATOM COORDINATES GRADIENTS \(-FORCES\)", @@ -143,7 +150,7 @@ class CPMDMainParser(MainHierarchicalParser): SM( " \(X\) EXCHANGE-CORRELATION ENERGY =\s+(?P<energy_XC_potential__hartree>{}) A\.U\.".format(self.regexs.regex_f)), ] ), - SM( " * TIMING *".replace("*", "\*"), + SM( re.escape(" * TIMING *"), sections=["x_cpmd_section_timing"], subMatchers=[ ] @@ -196,6 +203,33 @@ class CPMDMainParser(MainHierarchicalParser): backend.addValue("basis_set_planewave_cutoff", si_cutoff) backend.closeSection("section_basis_set_cell_dependent", basis_id) + def onClose_x_cpmd_section_scf_iteration(self, backend, gIndex, section): + # SCF step energy and energy change + scf_id = backend.openSection("section_scf_iteration") + energy = section.get_latest_value("x_cpmd_scf_etot") + backend.addValue("energy_total_scf_iteration", energy) + denergy = section.get_latest_value("x_cpmd_scf_detot") + backend.addValue("energy_change_scf_iteration", denergy) + backend.closeSection("section_scf_iteration", scf_id) + self.n_scf_iterations += 1 + + def onClose_x_cpmd_section_scf(self, backend, gIndex, section): + backend.addValue("number_of_scf_iterations", self.n_scf_iterations) + + def onClose_x_cpmd_section_atom_kind(self, backend, gIndex, section): + # Atomic kinds + label = section.get_latest_value("x_cpmd_atom_kind_label") + number = self.get_atom_number(label) + id_kind = backend.openSection("section_method_atom_kind") + backend.addValue("method_atom_kind_atom_number", number) + backend.addValue("method_atom_kind_label", label) + backend.closeSection("section_method_atom_kind", id_kind) + + def onClose_section_single_configuration_calculation(self, backend, gIndex, section): + # For single point calculations there is only one method and system. + backend.addValue("single_configuration_calculation_to_system_ref", 0) + backend.addValue("single_configuration_to_calculation_method_ref", 0) + #======================================================================= # adHoc def debug(self): @@ -287,3 +321,42 @@ class CPMDMainParser(MainHierarchicalParser): vectorstr = vectorstr.strip().split() vec_array = np.array([float(x) for x in vectorstr]) return vec_array + + def get_atom_number(self, symbol): + """ Returns the atomic number when given the atomic symbol. + + Args: + symbol: atomic symbol as string + + Returns: + The atomic number (number of protons) for the given symbol. + """ + chemical_symbols = [ + 'X', 'H', 'He', 'Li', 'Be', + 'B', 'C', 'N', 'O', 'F', + 'Ne', 'Na', 'Mg', 'Al', 'Si', + 'P', 'S', 'Cl', 'Ar', 'K', + 'Ca', 'Sc', 'Ti', 'V', 'Cr', + 'Mn', 'Fe', 'Co', 'Ni', 'Cu', + 'Zn', 'Ga', 'Ge', 'As', 'Se', + 'Br', 'Kr', 'Rb', 'Sr', 'Y', + 'Zr', 'Nb', 'Mo', 'Tc', 'Ru', + 'Rh', 'Pd', 'Ag', 'Cd', 'In', + 'Sn', 'Sb', 'Te', 'I', 'Xe', + 'Cs', 'Ba', 'La', 'Ce', 'Pr', + 'Nd', 'Pm', 'Sm', 'Eu', 'Gd', + 'Tb', 'Dy', 'Ho', 'Er', 'Tm', + 'Yb', 'Lu', 'Hf', 'Ta', 'W', + 'Re', 'Os', 'Ir', 'Pt', 'Au', + 'Hg', 'Tl', 'Pb', 'Bi', 'Po', + 'At', 'Rn', 'Fr', 'Ra', 'Ac', + 'Th', 'Pa', 'U', 'Np', 'Pu', + 'Am', 'Cm', 'Bk', 'Cf', 'Es', + 'Fm', 'Md', 'No', 'Lr' + ] + + atom_numbers = {} + for Z, name in enumerate(chemical_symbols): + atom_numbers[name] = Z + + return atom_numbers[symbol] diff --git a/src/main/scala/eu/nomad_lab/parsers/CpmdParser.scala b/src/main/scala/eu/nomad_lab/parsers/CpmdParser.scala index e76184e..1952638 100644 --- a/src/main/scala/eu/nomad_lab/parsers/CpmdParser.scala +++ b/src/main/scala/eu/nomad_lab/parsers/CpmdParser.scala @@ -21,15 +21,17 @@ object CpmdParser extends SimpleExternalParserGenerator( )) :: Nil ), mainFileTypes = Seq("text/.*"), - mainFileRe = """ \*\*\*\*\*\* \*\*\*\*\*\* \*\*\*\* \*\*\*\* \*\*\*\*\*\* - \*\*\*\*\*\*\* \*\*\*\*\*\*\* \*\*\*\*\*\*\*\*\*\* \*\*\*\*\*\*\* - \*\*\* \*\* \*\*\* \*\* \*\*\*\* \*\* \*\* \*\*\* - \*\* \*\* \*\*\* \*\* \*\* \*\* \*\* \*\* - \*\* \*\*\*\*\*\*\* \*\* \*\* \*\* \*\* - \*\*\* \*\*\*\*\*\* \*\* \*\* \*\* \*\*\* - \*\*\*\*\*\*\* \*\* \*\* \*\* \*\*\*\*\*\*\* - \*\*\*\*\*\* \*\* \*\* \*\* \*\*\*\*\*\*""".r, - cmd = Seq(DefaultPythonInterpreter.python2Exe(), "${envDir}/parsers/cpmd/parser/parser-cpmd/cpmdparser/scalainterface.py", + mainFileRe = """ \*\*\*\* \*\*\*\* \*\*\*\*\*\* \*\* PROGRAM STARTED AT\s(?<cpmdStartedAt>.*) + \*\*\*\*\* \*\* \*\*\* \*\*\* \*\* PROGRAM STARTED ON\s*.* + \*\* \*\*\*\* \*\*\*\*\*\* PROGRAM STARTED BY .* + \*\*\*\*\* \*\* \*\* \*\* \*\* PROGRAM PROCESS ID .* + \*\*\*\* \*\* \*\*\*\*\*\*\* \*\* PROGRAM STARTED IN .* +(?:\s*\n| \s+.* +)* +(?:\s*CP2K\| version string:\s*(?<cpmdVersionString>.*) +)?(?:\s*CP2K\| source code revision number:\s*(?<cpmdRevision>.*) +)?""".r, + cmd = Seq(DefaultPythonInterpreter.pythonExe(), "${envDir}/parsers/cpmd/parser/parser-cpmd/cpmdparser/scalainterface.py", "${mainFilePath}"), cmdCwd = "${mainFilePath}/..", resList = Seq( @@ -37,16 +39,15 @@ object CpmdParser extends SimpleExternalParserGenerator( "parser-cpmd/cpmdparser/setup_paths.py", "parser-cpmd/cpmdparser/parser.py", "parser-cpmd/cpmdparser/scalainterface.py", - "parser-cpmd/cpmdparser/generic/__init__.py", "parser-cpmd/cpmdparser/versions/__init__.py", "parser-cpmd/cpmdparser/versions/cpmd41/__init__.py", - "parser-cpmd/cpmdparser/versions/cpmd41/commonparser.py", "parser-cpmd/cpmdparser/versions/cpmd41/mainparser.py", "parser-cpmd/cpmdparser/versions/cpmd41/inputparser.py", + "parser-cpmd/cpmdparser/versions/cpmd41/commonparser.py", "nomad_meta_info/public.nomadmetainfo.json", "nomad_meta_info/common.nomadmetainfo.json", "nomad_meta_info/meta_types.nomadmetainfo.json", - "nomad_meta_info/cpmd.nomadmetainfo.json", + "nomad_meta_info/cpmd.nomadmetainfo.json" ) ++ DefaultPythonInterpreter.commonFiles(), dirMap = Map( "parser-cpmd" -> "parsers/cpmd/parser/parser-cpmd", diff --git a/src/test/scala/eu/nomad_lab/parsers/CpmdParserSpec.scala b/src/test/scala/eu/nomad_lab/parsers/CpmdParserSpec.scala index 472e92b..668e64d 100644 --- a/src/test/scala/eu/nomad_lab/parsers/CpmdParserSpec.scala +++ b/src/test/scala/eu/nomad_lab/parsers/CpmdParserSpec.scala @@ -3,6 +3,12 @@ package eu.nomad_lab.parsers import org.specs2.mutable.Specification object CpmdParserSpec extends Specification { + "CpmdParserTest" >> { + "test with json-events" >> { + ParserRun.parse(CpmdParser, "parsers/cpmd/test/examples/single_point/output.out", "json-events") must_== ParseResult.ParseSuccess + } + } + "test energy_force with json" >> { ParserRun.parse(CpmdParser, "parsers/cpmd/test/examples/single_point/output.out", "json") must_== ParseResult.ParseSuccess } diff --git a/test/unittests/cpmd_4.1/run_tests.py b/test/unittests/cpmd_4.1/run_tests.py index 4d23902..b61ee2f 100644 --- a/test/unittests/cpmd_4.1/run_tests.py +++ b/test/unittests/cpmd_4.1/run_tests.py @@ -160,115 +160,70 @@ class TestSinglePoint(unittest.TestCase): expected_result = convert_unit(np.array(-0.65031699), "hartree") self.assertTrue(np.array_equal(result, expected_result)) - # def test_energy_total_scf_iteration(self): - # result = self.results["energy_total_scf_iteration"] - # expected_result = convert_unit(np.array(-32.2320848878), "hartree") - # self.assertTrue(np.array_equal(result[0], expected_result)) - - # def test_program_compilation_host(self): - # result = self.results["program_compilation_host"] - # self.assertEqual(result, "lenovo700") - - # def test_scf_max_iteration(self): - # result = self.results["scf_max_iteration"] - # self.assertEqual(result, 300) - - # def test_scf_threshold_energy_change(self): - # result = self.results["scf_threshold_energy_change"] - # self.assertEqual(result, 1.00E-07) - - # def test_number_of_spin_channels(self): - # result = self.results["number_of_spin_channels"] - # self.assertEqual(result, 1) - - # def test_electronic_structure_method(self): - # result = self.results["electronic_structure_method"] - # self.assertEqual(result, "DFT") - - # def test_energy_change_scf_iteration(self): - # energy_change = self.results["energy_change_scf_iteration"] - # expected_result = convert_unit(np.array(-3.22E+01), "hartree") - # self.assertTrue(np.array_equal(energy_change[0], expected_result)) - - # def test_energy_XC_scf_iteration(self): - # result = self.results["energy_XC_scf_iteration"] - # expected_result = convert_unit(np.array(-9.4555961214), "hartree") - # self.assertTrue(np.array_equal(result[0], expected_result)) - - - # def test_electronic_kinetic_energy(self): - # result = self.results["electronic_kinetic_energy"] - # expected_result = convert_unit(np.array(13.31525592466419), "hartree") - # self.assertTrue(np.array_equal(result, expected_result)) - - # def test_x_cp2k_filenames(self): - # input_filename = self.results["x_cp2k_input_filename"] - # expected_input = "si_bulk8.inp" - # self.assertTrue(input_filename, expected_input) - - # bs_filename = self.results["x_cp2k_basis_set_filename"] - # expected_bs = "../BASIS_SET" - # self.assertEqual(bs_filename, expected_bs) - - # geminal_filename = self.results["x_cp2k_geminal_filename"] - # expected_geminal = "BASIS_GEMINAL" - # self.assertEqual(geminal_filename, expected_geminal) - - # potential_filename = self.results["x_cp2k_potential_filename"] - # expected_potential = "../GTH_POTENTIALS" - # self.assertEqual(potential_filename, expected_potential) + def test_energy_total_scf_iteration(self): + result = self.results["energy_total_scf_iteration"] + # Test the first and last energies + expected_result = convert_unit(np.array( + [ + [-1.096898], + [-1.132460], + ]), "hartree") + self.assertTrue(np.array_equal(np.array([[result[0]], [result[-1]]]), expected_result)) + + def test_energy_change_scf_iteration(self): + result = self.results["energy_change_scf_iteration"] + expected_result = convert_unit(np.array( + [ + [0.000E+00], + [-8.606E-13], + ]), "hartree") + self.assertTrue(np.array_equal(np.array([[result[0]], [result[-1]]]), expected_result)) - # mm_potential_filename = self.results["x_cp2k_mm_potential_filename"] - # expected_mm_potential = "MM_POTENTIAL" - # self.assertEqual(mm_potential_filename, expected_mm_potential) + def test_scf_max_iteration(self): + result = self.results["scf_max_iteration"] + self.assertEqual(result, 10000) - # coordinate_filename = self.results["x_cp2k_coordinate_filename"] - # expected_coordinate = "__STD_INPUT__" - # self.assertEqual(coordinate_filename, expected_coordinate) + def test_scf_threshold_energy_change(self): + result = self.results["scf_threshold_energy_change"] + self.assertEqual(result, convert_unit(1.00E-07, "hartree")) - # def test_target_multiplicity(self): - # multiplicity = self.results["spin_target_multiplicity"] - # self.assertEqual(multiplicity, 1) + def test_electronic_structure_method(self): + result = self.results["electronic_structure_method"] + self.assertEqual(result, "DFT") + def test_scf_dft_number_of_iterations(self): + result = self.results["number_of_scf_iterations"] + self.assertEqual(result, 10) - # def test_section_basis_set_atom_centered(self): - # basis = self.results["section_basis_set_atom_centered"][0] - # name = basis["basis_set_atom_centered_short_name"][0] - # number = basis["basis_set_atom_number"][0] - # self.assertEquals(name, "DZVP-GTH-PADE") - # self.assertEquals(number, 14) + def test_section_method_atom_kind(self): + kind = self.results["section_method_atom_kind"][0] + self.assertEqual(kind["method_atom_kind_atom_number"][0], 1) + self.assertEqual(kind["method_atom_kind_label"][0], "H") - # def test_section_basis_set_cell_dependent(self): - # basis = self.results["section_basis_set_cell_dependent"][0] - # cutoff = basis["basis_set_planewave_cutoff"][0] - # self.assertEquals(cutoff, convert_unit(300.0, "hartree")) + def test_section_method_basis_set(self): + kind = self.results["section_method_basis_set"][0] + self.assertEqual(kind["method_basis_set_kind"][0], "wavefunction") + self.assertTrue(np.array_equal(kind["mapping_section_method_basis_set_cell_associated"][0], 0)) - # def test_section_method_atom_kind(self): - # kind = self.results["section_method_atom_kind"][0] - # self.assertEqual(kind["method_atom_kind_atom_number"][0], 14) - # self.assertEqual(kind["method_atom_kind_label"][0], "Si1") + def test_single_configuration_to_calculation_method_ref(self): + result = self.results["single_configuration_to_calculation_method_ref"] + self.assertEqual(result, 0) - # def test_section_method_basis_set(self): - # kind = self.results["section_method_basis_set"][0] - # self.assertEqual(kind["method_basis_set_kind"][0], "wavefunction") - # self.assertEqual(kind["number_of_basis_sets_atom_centered"][0], 1) - # self.assertTrue(np.array_equal(kind["mapping_section_method_basis_set_atom_centered"][0], np.array([[0,0]]))) + def test_single_configuration_calculation_to_system_description_ref(self): + result = self.results["single_configuration_calculation_to_system_ref"] + self.assertEqual(result, 0) # def test_single_configuration_calculation_converged(self): # result = self.results["single_configuration_calculation_converged"] # self.assertTrue(result) - # def test_scf_dft_number_of_iterations(self): - # result = self.results["number_of_scf_iterations"] - # self.assertEqual(result, 10) - - # def test_single_configuration_to_calculation_method_ref(self): - # result = self.results["single_configuration_to_calculation_method_ref"] - # self.assertEqual(result, 0) + # def test_number_of_spin_channels(self): + # result = self.results["number_of_spin_channels"] + # self.assertEqual(result, 1) - # def test_single_configuration_calculation_to_system_description_ref(self): - # result = self.results["single_configuration_calculation_to_system_ref"] - # self.assertEqual(result, 0) + # def test_target_multiplicity(self): + # multiplicity = self.results["spin_target_multiplicity"] + # self.assertEqual(multiplicity, 1) # def test_stress_tensor(self): # result = self.results["stress_tensor"] @@ -307,7 +262,6 @@ class TestSinglePoint(unittest.TestCase): # self.assertTrue(np.array_equal(result, expected_result)) - # #=============================================================================== # class TestErrors(unittest.TestCase): # """Test misc. error stuations which may occur during the parsing. -- GitLab