From e1d57e900204854df5ccd15e807b8e0683939f05 Mon Sep 17 00:00:00 2001
From: Lauri Himanen <lauri.himanen@gmail.com>
Date: Thu, 23 Apr 2020 13:33:54 +0300
Subject: [PATCH] Added initial special point resolution to band structure
 normalizer.

---
 nomad/atomutils.py                  |  23 +-
 nomad/config.py                     |   2 +-
 nomad/normalizing/band_structure.py | 338 +++++++++++++++-------------
 3 files changed, 206 insertions(+), 157 deletions(-)

diff --git a/nomad/atomutils.py b/nomad/atomutils.py
index fd579d9d68..6c811cd3ad 100644
--- a/nomad/atomutils.py
+++ b/nomad/atomutils.py
@@ -17,7 +17,7 @@ import fractions
 import itertools
 from math import gcd as gcd
 from functools import reduce
-from typing import List, Dict, Tuple, Any
+from typing import List, Dict, Tuple, Any, Union
 
 import numpy as np
 from scipy.spatial import Voronoi  # pylint: disable=no-name-in-module
@@ -41,6 +41,27 @@ def get_summed_atomic_mass(atomic_numbers: np.ndarray) -> float:
     return mass
 
 
+def find_match(self, pos: np.array, positions: np.array, eps: float) -> Union[int, None]:
+    """Attempts to find a position within a larger list of positions.
+
+    Args:
+        pos: The point to search for
+        positions: The points within which the search is performed.
+        eps: Match tolerance.
+
+    Returns:
+        Index of the matched position or None if match not found.
+    """
+    displacements = positions - pos
+    distances = np.linalg.norm(displacements, axis=1)
+    min_arg = np.argmin(distances)
+    min_value = distances[min_arg]
+    if min_value <= eps:
+        return min_arg
+    else:
+        return None
+
+
 def get_symmetry_string(space_group: int, wyckoff_sets: List[WyckoffSet]) -> str:
     """Used to serialize symmetry information into a string. The Wyckoff
     positions are assumed to be normalized and ordered as is the case if using
diff --git a/nomad/config.py b/nomad/config.py
index da0db44a86..fe2216434a 100644
--- a/nomad/config.py
+++ b/nomad/config.py
@@ -249,7 +249,7 @@ normalize = NomadConfig(
     k_space_precision=150e6,
     # The energy threshold for how much a band can be on top or below the fermi
     # level in order to detect a gap. k_B x T at room temperature. Unit: Joule
-    fermi_level_precision=300 * 1.38064852E-23,
+    band_structure_energy_tolerance=300 * 1.38064852E-23,
     springer_db_path=os.path.join(
         os.path.dirname(os.path.abspath(__file__)),
         'normalizing/data/springer.msg'
diff --git a/nomad/normalizing/band_structure.py b/nomad/normalizing/band_structure.py
index 120ea1783a..4cdf6d8d5d 100644
--- a/nomad/normalizing/band_structure.py
+++ b/nomad/normalizing/band_structure.py
@@ -13,11 +13,14 @@
 # limitations under the License.
 
 import ase
+import json
 import numpy as np
+from typing import List
 
-from nomad.datamodel.metainfo.public import section_k_band
+from nomad.datamodel.metainfo.public import section_k_band, section_single_configuration_calculation, section_band_gap, section_system
 from nomad.normalizing.normalizer import Normalizer
 from nomad.constants import pi
+from nomad import config, atomutils
 
 
 class BandStructureNormalizer(Normalizer):
@@ -41,23 +44,139 @@ class BandStructureNormalizer(Normalizer):
 
         # Loop through the bands
         for scc in self.section_run.section_single_configuration_calculation:
+
+            # In order to resolve band gaps, we need a reference to the highest
+            # occupied energy.
+            valence_band_maximum = scc.energy_reference_highest_occupied
+
+            # In order to resolve the special points and the reciprocal cell,
+            # we need informatoin about the system.
+            system = scc.section_single_configuration_calculation_to_system_ref
+
             for band in scc.section_k_band:
-                self.add_band_gaps(band)
-                self.add_path_labels(band)
+                self.add_reciprocal_cell(band, system)
+                self.add_brillouin_zone(band)
+                self.add_band_gaps(band, valence_band_maximum)
+                self.add_path_labels(band, system)
                 self.add_is_standard(band)
 
-    def add_band_gaps(self, band: section_k_band) -> None:
+    def add_reciprocal_cell(self, band: section_k_band, system: section_system):
+        """A reciprocal cell for this calculation. If the original unit cell is
+        not a primitive one, then we will use the one given by spglib.
+
+        If the used code is calculating it's own primitive cell, then the
+        reciprocal cell used in e.g. band structure calculation might differ
+        from the one given by spglib. To overcome this we would need to get the
+        exact reciprocal cell used by the calculation or then test for
+        different primitive cells
+        (https://atztogo.github.io/spglib/definition.html#transformation-to-the-primitive-cell)
+        whether the k-point path stays inside the first Brillouin zone.
+        """
+        try:
+            orig_atoms = system.m_cache["representative_atoms"]
+            symmetry_analyzer = system.section_symmetry[0].m_cache["symmetry_analyzer"]
+            prim_atoms = symmetry_analyzer.get_primitive_system()
+        except Exception:
+            self.logger.info("Could not resolve reciprocal cell.")
+            return
+
+        primitive_cell = prim_atoms.get_cell()
+        source_cell = orig_atoms.get_cell()
+
+        volume_primitive = primitive_cell.volume
+        volume_source = source_cell.volume
+        volume_diff = abs(volume_primitive - volume_source)
+
+        if volume_diff > (0.001)**3:
+            recip_cell = primitive_cell.reciprocal() * 1e10
+        else:
+            recip_cell = source_cell.reciprocal() * 1e10
+
+        band.reciprocal_cell = recip_cell
+
+    def add_brillouin_zone(self, band: section_k_band) -> None:
+        """Adds a dictionary containing the information needed to display
+        the Brillouin zone for this material. This functionality could be put
+        into the GUI directly, with the Brillouin zone construction performed
+        from the reciprocal cell.
+
+        The Brillouin Zone is a Wigner-Seitz cell, and is thus uniquely
+        defined. It's shape does not depend on the used primitive cell.
+        """
+        recip_cell = band.reciprocal_cell
+        if recip_cell is None:
+            self.logger.info("Could not resolve Brillouin zone as reciprocal cell is missing.")
+            return
+
+        brillouin_zone = atomutils.get_brillouin_zone(recip_cell.magnitude)
+        bz_json = json.dumps(brillouin_zone)
+        band.brillouin_zone = bz_json
+
+    def get_k_space_distance(self, reciprocal_cell: np.array, point1: np.array, point2: np.array) -> float:
+        """Used to calculate the Euclidean distance of two points in k-space,
+        given relative positions in the reciprocal cell.
+
+        Args:
+            reciprocal_cell: Reciprocal cell.
+            point1: The first position in k-space.
+            point2: The second position in k-space.
+
+        Returns:
+            float: Euclidian distance of the two points in k-space in SI units.
+        """
+        k_point_displacement = np.dot(reciprocal_cell, point1 - point2)
+        k_point_distance = np.linalg.norm(k_point_displacement)
+
+        return k_point_distance
+
+    def add_band_gaps(self, band: section_k_band, valence_band_maximum: np.array) -> None:
         """Given the band structure and fermi level, calculates the band gap
         for spin channels and also reports the total band gap as the minum gap
         found.
         """
+        if valence_band_maximum is None:
+            self.logger.info("Could not resolve band gaps as the energy reference is missing.")
+            return
+
+        reciprocal_cell = band.reciprocal_cell
+        if reciprocal_cell is None:
+            self.logger.info("Could not resolve band gaps as reciprocal cell is missing.")
+            return
+
+        # Gather the energies and k points from each segment into one big
+        # array
+        reciprocal_cell = reciprocal_cell.magnitude
+        valence_band_maximum = valence_band_maximum.magnitude
+        path: np.array = []
+        energies: np.array = []
+        for segment in band.section_k_band_segment:
+            try:
+                seg_k_points = segment.band_k_points
+                seg_energies = segment.band_energies
+            except Exception:
+                return
+            if seg_k_points is None or seg_energies is None:
+                self.logger.info("Could not resolve band gaps as energies or k points are missing.")
+                return
+            else:
+                seg_energies = seg_energies.magnitude
+
+            seg_energies = np.swapaxes(seg_energies, 1, 2)
+            path.append(seg_k_points)
+            energies.append(seg_energies)
+
+        path = np.concatenate(path, axis=0)
+        energies = np.concatenate(energies, axis=2)
+
         # Handle spin channels separately to find gaps for spin up and down
-        reciprocal_cell = band_structure.reciprocal_cell.magnitude
-        fermi_level = band_structure.fermi_level
-        n_channels = energies.shape[0]
+        n_channels = energies.shape[0]  # pylint: disable=E1136  # pylint/issues/3139
+        gaps: List[section_band_gap] = [None, None]
 
-        gaps: List[BandGap] = [None, None]
         for channel in range(n_channels):
+            if n_channels == 1:
+                vbm = valence_band_maximum
+            else:
+                vbm = valence_band_maximum[channel]
             channel_energies = energies[channel, :, :]
             lower_defined = False
             upper_defined = False
@@ -69,8 +188,8 @@ class BandStructureNormalizer(Normalizer):
             band_maxima = channel_energies[band_indices, band_maxima_idx]
 
             # Add a tolerance to minima and maxima
-            band_minima_tol = band_minima + config.normalize.fermi_level_precision
-            band_maxima_tol = band_maxima - config.normalize.fermi_level_precision
+            band_minima_tol = band_minima + config.normalize.band_structure_energy_tolerance
+            band_maxima_tol = band_maxima - config.normalize.band_structure_energy_tolerance
 
             for band_idx in range(num_bands):
                 band_min = band_minima[band_idx]
@@ -80,17 +199,17 @@ class BandStructureNormalizer(Normalizer):
 
                 # If any of the bands band crosses the Fermi level, there is no
                 # band gap
-                if band_min_tol <= fermi_level and band_max_tol >= fermi_level:
+                if band_min_tol <= vbm and band_max_tol >= vbm:
                     break
                 # Whole band below Fermi level, save the current highest
                 # occupied band point
-                elif band_min_tol <= fermi_level and band_max_tol <= fermi_level:
+                elif band_min_tol <= vbm and band_max_tol <= vbm:
                     gap_lower_energy = band_max
                     gap_lower_idx = band_maxima_idx[band_idx]
                     lower_defined = True
                 # Whole band above Fermi level, save the current lowest
                 # unoccupied band point
-                elif band_min_tol >= fermi_level:
+                elif band_min_tol >= vbm:
                     gap_upper_energy = band_min
                     gap_upper_idx = band_minima_idx[band_idx]
                     upper_defined = True
@@ -108,7 +227,7 @@ class BandStructureNormalizer(Normalizer):
                 k_point_distance = self.get_k_space_distance(reciprocal_cell, k_point_lower, k_point_upper)
                 is_direct_gap = k_point_distance <= config.normalize.k_space_precision
 
-                gap = BandGap()
+                gap = section_band_gap()
                 gap.type = "direct" if is_direct_gap else "indirect"
                 gap.value = float(gap_upper_energy - gap_lower_energy)
                 gap.conduction_band_min_k_point = k_point_upper
@@ -120,41 +239,75 @@ class BandStructureNormalizer(Normalizer):
         # For unpolarized calculations we simply report the gap if it is found.
         if n_channels == 1:
             if gaps[0] is not None:
-                band_structure.m_add_sub_section(ElectronicBandStructure.band_gap, gaps[0])
+                band.m_add_sub_section(section_k_band.section_band_gap, gaps[0])
         # For polarized calculations we report the gap separately for both
         # channels. Also we report the smaller gap as the the total gap for the
         # calculation.
         elif n_channels == 2:
             if gaps[0] is not None:
-                band_structure.m_add_sub_section(ElectronicBandStructure.band_gap_spin_up, gaps[0])
+                band.m_add_sub_section(section_k_band.section_band_gap_spin_up, gaps[0])
             if gaps[1] is not None:
-                band_structure.m_add_sub_section(ElectronicBandStructure.band_gap_spin_down, gaps[1])
+                band.m_add_sub_section(section_k_band.section_band_gap_spin_down, gaps[1])
             if gaps[0] is not None and gaps[1] is not None:
                 if gaps[0].value <= gaps[1].value:
-                    band_structure.m_add_sub_section(ElectronicBandStructure.band_gap, gaps[0])
+                    band.m_add_sub_section(section_k_band.section_band_gap, gaps[0])
                 else:
-                    band_structure.m_add_sub_section(ElectronicBandStructure.band_gap, gaps[1])
+                    band.m_add_sub_section(section_k_band.section_band_gap, gaps[1])
             else:
                 if gaps[0] is not None:
-                    band_structure.m_add_sub_section(ElectronicBandStructure.band_gap, gaps[0])
+                    band.m_add_sub_section(section_k_band.section_band_gap, gaps[0])
                 elif gaps[1] is not None:
-                    band_structure.m_add_sub_section(ElectronicBandStructure.band_gap, gaps[1])
+                    band.m_add_sub_section(section_k_band.section_band_gap, gaps[1])
 
+    def add_path_labels(self, band: section_k_band, system: section_system) -> None:
+        """Adds special high symmmetry point labels to the band path.
+        """
+        try:
+            cell = system.lattice_vectors.magnitude
+        except Exception:
+            self.logger.info("Could not resolve path labels as lattice vectors are missing.")
+            return
 
-    def add_path_labels(self, band: section_k_band) -> None:
-        pass
+        # Find special points for this lattice
+        special_points = ase.dft.kpoints.get_special_points(cell)
+        special_point_labels = list(special_points.keys())
+
+        # Form a conctiguous array of k points for faster operations
+        special_k_points = np.empty((len(special_points), 3))
+        for i, kpt in enumerate(special_points.values()):
+            special_k_points[i, :] = kpt
+
+        # Determine match tolerance in 1/m
+        eps = config.normalize.k_space_precision
+
+        # Try to find matches for the special points. We only attempt to match
+        # points at the start and end of a segment.
+        for segment in band.section_k_band_segment:
+            start_point = segment.band_k_points[0]
+            end_point = segment.band_k_points[-1]
+            start_index = atomutils.find_match(start_point, special_k_points, eps)
+            end_index = atomutils.find_match(end_point, special_points, eps)
+            if start_index is None:
+                start_label = None
+            else:
+                start_label = special_point_labels[start_index]
+            if end_index is None:
+                end_label = None
+            else:
+                end_label = special_point_labels[end_index]
+            segment.band_path_labels = [start_label, end_label]
 
     def add_is_standard(self, band: section_k_band) -> None:
         pass
 
-    def crystal_structure_from_cell(self, cell, eps=1e-4):
+    def crystal_structure_from_cell(self, cell: ase.cell.Cell, eps=1e-4):
         """Return the crystal structure as a string calculated from the cell.
         """
         cellpar = ase.geometry.cell_to_cellpar(cell=cell)
         abc = cellpar[:3]
         angles = cellpar[3:] / 180 * pi
         a, b, c = abc
-        alpha, beta, gamma = angles
+        alpha, _, gamma = angles
 
         # According to:
         # https://www.physics-in-a-nutshell.com/article/6/symmetry-crystal-systems-and-bravais-lattices#the-seven-crystal-systems
@@ -171,17 +324,14 @@ class BandStructureNormalizer(Normalizer):
         elif abs(angles - pi / 2).max() < eps:
             return 'orthorhombic'
         # if a = b != c , alpha = beta = 90deg, gamma = 120deg, hexagonal
-        elif (abs(a - b) < eps
-            and abs(gamma - pi / 3 * 2) < eps
-            and abs(angles[:2] - pi / 2).max() < eps):
+        elif (abs(a - b) < eps and abs(gamma - pi / 3 * 2) < eps and abs(angles[:2] - pi / 2).max() < eps):
             return 'hexagonal'
-        elif (c >= a and c >= b and alpha < pi / 2 and
-            abs(angles[1:] - pi / 2).max() < eps):
+        elif (c >= a and c >= b and alpha < pi / 2 and abs(angles[1:] - pi / 2).max() < eps):
             return 'monoclinic'
         else:
             raise ValueError('Cannot find crystal structure')
 
-    def get_special_points(cell, eps=1e-4):
+    def get_special_points(self, cell, eps=1e-4):
         """Return dict of special points.
 
         The definitions are from a paper by Wahyu Setyawana and Stefano
@@ -197,14 +347,12 @@ class BandStructureNormalizer(Normalizer):
         eps: float
             Tolerance for cell-check.
         """
-
-        lattice = crystal_structure_from_cell(cell)
-
+        lattice = self.crystal_structure_from_cell(cell)
         cellpar = ase.geometry.cell_to_cellpar(cell=cell)
         abc = cellpar[:3]
         angles = cellpar[3:] / 180 * pi
         a, b, c = abc
-        alpha, beta, gamma = angles
+        alpha, _, gamma = angles
 
         # Check that the unit-cells are as in the Setyawana-Curtarolo paper:
         if lattice == 'cubic':
@@ -247,123 +395,3 @@ class BandStructureNormalizer(Normalizer):
                 'Y': [0, 0, 1 / 2],
                 'Y1': [0, 0, -1 / 2],
                 'Z': [1 / 2, 0, 0]}
-
-specialPoints = {}
-try:
-    specialPoints = get_special_points(
-        convert_unit_function("m", "angstrom")(self.cell))
-except Exception as e:
-    self.logger.warn("failed to get special points/xtal structure", exc_info=e)
-
-
-    # Loop over bands
-    for band_data in bands:
-
-        # Add reciprocal cell and brillouin zone
-        self.get_reciprocal_cell(band_data, prim_atoms, orig_atoms)
-        self.get_brillouin_zone(band_data)
-
-        # If the parser has reported a valence band maximum, we can add
-        # band gap information.
-        if valence_band_maximum is not None:
-            kpoints = []
-            energies = []
-            segments = band_data.section_k_band_segment
-            if not segments:
-                return
-
-            # Loop over segments
-            for segment_src in segments:
-                try:
-                    seg_k_points = segment_src.band_k_points
-                    seg_energies = segment_src.band_energies
-                except Exception:
-                    return
-                if seg_k_points is None or seg_energies is None:
-                    return
-                else:
-                    seg_energies = seg_energies.magnitude
-
-                seg_energies = np.swapaxes(seg_energies, 1, 2)
-                kpoints.append(seg_k_points)
-                energies.append(seg_energies)
-
-            # Continue to calculate band gaps and other properties.
-            kpoints = np.concatenate(kpoints, axis=0)
-            energies = np.concatenate(energies, axis=2)
-
-            # If the parser has reported a valence band maximum, we can add
-            # band gap information.
-            self.get_band_gaps(band_data, energies, kpoints, valence_band_maximum)
-
-
-valence_band_maximum = representative_scc.energy_reference_highest_occupied
-if valence_band_maximum is not None:
-    valence_band_maximum = valence_band_maximum.magnitude
-
-
-    def get_reciprocal_cell(self, band_structure: ElectronicBandStructure, prim_atoms: Atoms, orig_atoms: Atoms):
-        """A reciprocal cell for this calculation. If the original unit cell is
-        not a primitive one, then we will use the one given by spglib.
-
-        If the used code is calculating it's own primitive cell, then the
-        reciprocal cell used in e.g. band structure calculation might differ
-        from the one given by spglib. To overcome this we would need to get the
-        exact reciprocal cell used by the calculation or then test for
-        different primitive cells
-        (https://atztogo.github.io/spglib/definition.html#transformation-to-the-primitive-cell)
-        whether the k-point path stays inside the first Brillouin zone.
-
-        Args:
-            prim_atoms: The primitive system as detected by MatID.
-            orig_atoms: The original simulation cell.
-
-        Returns:
-            Reciprocal cell as 3x3 numpy array. Units are in SI (1/m).
-        """
-        primitive_cell = prim_atoms.get_cell()
-        source_cell = orig_atoms.get_cell()
-
-        volume_primitive = primitive_cell.volume
-        volume_source = source_cell.volume
-        volume_diff = abs(volume_primitive - volume_source)
-
-        if volume_diff > (0.001)**3:
-            recip_cell = primitive_cell.reciprocal() * 1e10
-        else:
-            recip_cell = source_cell.reciprocal() * 1e10
-
-        band_structure.reciprocal_cell = recip_cell
-
-
-    def get_k_space_distance(self, reciprocal_cell: np.array, point1: np.array, point2: np.array) -> float:
-        """Used to calculate the Euclidean distance of two points in k-space,
-        given relative positions in the reciprocal cell.
-
-        Args:
-            reciprocal_cell: Reciprocal cell.
-            point1: The first position in k-space.
-            point2: The second position in k-space.
-
-        Returns:
-            float: Euclidian distance of the two points in k-space in SI units.
-        """
-        k_point_displacement = np.dot(reciprocal_cell, point1 - point2)
-        k_point_distance = np.linalg.norm(k_point_displacement)
-
-        return k_point_distance
-
-    def get_brillouin_zone(self, band_structure: ElectronicBandStructure) -> None:
-        """Returns a dictionary containing the information needed to display
-        the Brillouin zone for this material. This functionality could be put
-        into the GUI directly, with the Brillouin zone construction performed
-        from the reciprocal cell.
-
-        The Brillouin Zone is a Wigner-Seitz cell, and is thus uniquely
-        defined. It's shape does not depend on the used primitive cell.
-        """
-        recip_cell = band_structure.reciprocal_cell.magnitude
-        brillouin_zone = atomutils.get_brillouin_zone(recip_cell)
-        bz_json = json.dumps(brillouin_zone)
-        band_structure.brillouin_zone = bz_json
-
-- 
GitLab