diff --git a/dependencies/nomad-meta-info b/dependencies/nomad-meta-info index c3bb8b69771a3cc87ca500f80a182380175e09cf..6cc658ff3117dfccad9aace953d225385e59c5cf 160000 --- a/dependencies/nomad-meta-info +++ b/dependencies/nomad-meta-info @@ -1 +1 @@ -Subproject commit c3bb8b69771a3cc87ca500f80a182380175e09cf +Subproject commit 6cc658ff3117dfccad9aace953d225385e59c5cf diff --git a/nomad/config.py b/nomad/config.py index 6ab76ca2c953d0f8ec37f577ce0f11f644d17369..b25ed02b14ea15e6edda2567fc89f87af5dbea53 100644 --- a/nomad/config.py +++ b/nomad/config.py @@ -206,6 +206,7 @@ max_upload_size = 32 * (1024 ** 3) springer_db_relative_path = 'normalizing/data/SM_all08.db' springer_db_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), springer_db_relative_path) + def normalize_loglevel(value, default_level=logging.INFO): plain_value = value if plain_value is None: diff --git a/nomad/normalizing/system.py b/nomad/normalizing/system.py index 90df65c1b7b697e4062dfdbeff297d2ed1651263..aef180f44f2655b7b8d1dc1548bc74e5b89b156a 100644 --- a/nomad/normalizing/system.py +++ b/nomad/normalizing/system.py @@ -42,9 +42,10 @@ atom_label_re = re.compile('|'.join( springer_db_connection = None + def open_springer_database(): """ - Create a global connection to the Springer database in a way that + Create a global connection to the Springer database in a way that each worker opens the database just once. """ global springer_db_connection @@ -54,10 +55,11 @@ def open_springer_database(): if not os.path.exists(db_file): utils.get_logger(__name__).error('Springer database not found') return None - springer_db_connection = sqlite3.connect(db_file) + springer_db_connection = sqlite3.connect(db_file) return springer_db_connection + def normalized_atom_labels(atom_labels): """ Normalizes the given atom labels: they either are labels right away, or contain @@ -68,26 +70,26 @@ def normalized_atom_labels(atom_labels): ase.data.chemical_symbols[0] if match is None else match.group(0) for match in [re.search(atom_label_re, atom_label) for atom_label in atom_labels]] + def formula_normalizer(atoms): """ Reads the chemical symbols in ase.atoms and returns a normalized formula. Formula normalization is on the basis of atom counting, - e.g., Tc -> Tc100, SZn -> S50Zn50, Co2Nb -> Co67Nb33 + e.g., Tc -> Tc100, SZn -> S50Zn50, Co2Nb -> Co67Nb33 """ - # + # chem_symb = atoms.get_chemical_symbols() - - atoms_counter = Counter(chem_symb) # dictionary + atoms_counter = Counter(chem_symb) # dictionary atoms_total = sum(atoms_counter.values()) atoms_normed = [] for key in atoms_counter.keys(): - norm = str(round(100 * atoms_counter[key] / atoms_total)) - atoms_normed.append( key + norm) + norm = str(round(100 * atoms_counter[key] / atoms_total)) + atoms_normed.append(key + norm) # atoms_normed.sort() return ''.join(atoms_normed) - + class SystemNormalizer(SystemBasedNormalizer): @@ -170,7 +172,7 @@ class SystemNormalizer(SystemBasedNormalizer): if atom_species is None: atom_species = atoms.get_atomic_numbers().tolist() self._backend.addArrayValues('atom_species', atom_species) - else: + else: if not isinstance(atom_species, list): atom_species = [atom_species] if atom_species != atoms.get_atomic_numbers().tolist(): @@ -385,31 +387,31 @@ class SystemNormalizer(SystemBasedNormalizer): self._backend.addArrayValues('equivalent_atoms_original', orig_equivalent_atoms) self._backend.closeSection('section_original_system', orig_gid) self._backend.closeSection('section_symmetry', symmetry_gid) - - self.springer_classification(atoms, space_group_number) # Springer Normalizer + + self.springer_classification(atoms, space_group_number) # Springer Normalizer self.prototypes(prim_num, prim_wyckoff, space_group_number) self._backend.closeSection('section_symmetry', symmetry_gid) - + def springer_classification(self, atoms, space_group_number): # SPRINGER NORMALIZER - normalized_formula = formula_normalizer(atoms) - # + normalized_formula = formula_normalizer(atoms) + # springer_db_connection = open_springer_database() - if springer_db_connection is None: + if springer_db_connection is None: return - cur = springer_db_connection.cursor() + cur = springer_db_connection.cursor() - # SQL QUERY + # SQL QUERY # (this replaces the four queries done in the old 'classify4me_SM_normalizer.py') - cur.execute(""" + cur.execute(""" SELECT entry.entry_id, entry.alphabetic_formula, GROUP_CONCAT(DISTINCT compound_classes.compound_class_name), - GROUP_CONCAT(DISTINCT classification.classification_name) + GROUP_CONCAT(DISTINCT classification.classification_name) FROM entry LEFT JOIN entry_compound_class as ecc ON ecc.entry_nr = entry.entry_nr LEFT JOIN compound_classes ON ecc.compound_class_nr = compound_classes.compound_class_nr @@ -422,57 +424,57 @@ class SystemNormalizer(SystemBasedNormalizer): """ % (normalized_formula, space_group_number)) results = cur.fetchall() # All SQL queries done - - + dbdict = {} - for ituple in results: + for ituple in results: for item in ituple: # 'spr' means 'springer' spr_id = ituple[0] - spr_aformula = ituple[1] # alphabetical formula + spr_aformula = ituple[1] # alphabetical formula spr_url = 'http://materials.springer.com/isp/crystallographic/docs/' + spr_id spr_compound = ituple[2].split(',') # convert string to list spr_classification = ituple[3].split(',') - # - + # + spr_compound.sort() spr_classification.sort() - dbdict[spr_id] = {'spr_id': spr_id, - 'spr_aformula': spr_aformula, - 'spr_url': spr_url, - 'spr_compound': spr_compound, - 'spr_classification': spr_classification } - + dbdict[spr_id] = {'spr_id': spr_id, + 'spr_aformula': spr_aformula, + 'spr_url': spr_url, + 'spr_compound': spr_compound, + 'spr_classification': spr_classification} - # SPRINGER's METAINFO UPDATE - # LAYOUT: Five sections under 'section_springer_material' for each material ID: + # SPRINGER's METAINFO UPDATE + # LAYOUT: Five sections under 'section_springer_material' for each material ID: # id, alphabetical formula, url, compound_class, clasification. # As per Markus/Luca's emails, we don't expose Springer bib references (Springer's paywall) - for material in dbdict.values(): - self._backend.openNonOverlappingSection('section_springer_material') + for material in dbdict.values(): + self._backend.openNonOverlappingSection('section_springer_material') self._backend.addValue('springer_id', material['spr_id']) - self._backend.addValue('springer_alphabetical_formula', material['spr_aformula']) + self._backend.addValue('springer_alphabetical_formula', material['spr_aformula']) self._backend.addValue('springer_url', material['spr_url']) self._backend.addArrayValues('springer_compound_class', material['spr_compound']) self._backend.addArrayValues('springer_classification', material['spr_classification']) - - self._backend.closeNonOverlappingSection('section_springer_material') + self._backend.closeNonOverlappingSection('section_springer_material') - # CHECK if the springer_classification and springer_compound_class found for each springer_id match + # Check the 'springer_classification' and 'springer_compound_class' information + # found is the same for all springer_id's dkeys = list(dbdict.keys()) - class_0 = dbdict[dkeys[0]]['spr_classification'] - comp_0 = dbdict[spr_id]['spr_compound'] - - for ii in range(1, len(dkeys)): - class_test = class_0 == dbdict[dkeys[ii]]['spr_classification'] - comp_test = comp_0 == dbdict[dkeys[ii]]['spr_compound'] - - if (class_test or comp_test) is False: - self.logger.warning('Mismatch in Springer classification or compounds') - + if len(dkeys) != 0: + class_0 = dbdict[dkeys[0]]['spr_classification'] + comp_0 = dbdict[spr_id]['spr_compound'] + + # compare 'class_0' and 'comp_0' against the rest + for ii in range(1, len(dkeys)): + class_test = (class_0 == dbdict[dkeys[ii]]['spr_classification']) + comp_test = (comp_0 == dbdict[dkeys[ii]]['spr_compound']) + + if (class_test or comp_test) is False: + self.logger.warning('Mismatch in Springer classification or compounds') + def prototypes(self, atomSpecies, wyckoffs, spg_nr): try: norm_wyckoff = SystemNormalizer.get_normalized_wyckoff(atomSpecies, wyckoffs) diff --git a/tests/test_normalizing.py b/tests/test_normalizing.py index fb4aae49fd33f1fbaeb1530363b676c17021ff17..f9cf12c5a974ff6aa1449f698c8e4d2681506eb5 100644 --- a/tests/test_normalizing.py +++ b/tests/test_normalizing.py @@ -205,31 +205,31 @@ def test_vasp_incar_system(): """ backend = parse_file(vasp_parser) backend = run_normalize(backend) - expected_value = 'SrTiO3' # material's formula in vasp.xml + expected_value = 'SrTiO3' # material's formula in vasp.xml + + # backend_value = backend.get_value('x_vasp_unknown_incars') # OK + # backend_value = backend.get_value('x_vasp_atom_kind_refs') # OK + backend_value = backend.get_value('x_vasp_incar_SYSTEM') # OK - #backend_value = backend.get_value('x_vasp_unknown_incars') # OK - #backend_value = backend.get_value('x_vasp_atom_kind_refs') # OK - backend_value = backend.get_value('x_vasp_incar_SYSTEM') # OK - print("backend_value: ", backend_value) assert expected_value == backend_value def test_springer_normalizer(): """ - Ensure the Springer normalizer works well with the VASP example. + Ensure the Springer normalizer works well with the VASP example. """ backend = parse_file(vasp_parser) backend = run_normalize(backend) - backend_value = backend.get_value('springer_url', 89) - # with get_value('springer_id') fails. - expected_value = 'http://materials.springer.com/isp/crystallographic/docs/sd_1932539' - print("backend_value: ", backend_value) - assert expected_value == backend_value + backend_value = backend.get_value('springer_id', 89) + expected_value = 'sd_1932539' + assert expected_value == backend_value - # FIXME: search for ID, - # also check NON empty for the others - # avois storing single use variables + backend_value = backend.get_value('springer_alphabetical_formula', 89) + expected_value = 'O3SrTi' + assert expected_value == backend_value - # TODO: add test fo rptototypes \ No newline at end of file + backend_value = backend.get_value('springer_url', 89) + expected_value = 'http://materials.springer.com/isp/crystallographic/docs/sd_1932539' + assert expected_value == backend_value