From 95f84f6e04d5617559e9cd5a43cd69b5f7701eab Mon Sep 17 00:00:00 2001
From: Ask Hjorth Larsen <asklarsen@gmail.com>
Date: Thu, 8 Sep 2016 20:47:02 +0200
Subject: [PATCH] work on gulp parser.  Space groups

---
 parser/parser-gulp/main.py        | 135 ++++++++++++++++++++++++++----
 parser/parser-gulp/spacegroups.py |  81 ++++++++++++++++++
 2 files changed, 202 insertions(+), 14 deletions(-)
 create mode 100644 parser/parser-gulp/spacegroups.py

diff --git a/parser/parser-gulp/main.py b/parser/parser-gulp/main.py
index 677a000..1dd7a4d 100644
--- a/parser/parser-gulp/main.py
+++ b/parser/parser-gulp/main.py
@@ -11,9 +11,11 @@ from nomadcore.local_meta_info import loadJsonFile, InfoKindEl
 from nomadcore.unit_conversion.unit_conversion \
     import register_userdefined_quantity, convert_unit
 
+# relative import
+from spacegroups import get_spacegroup_number
 #from util import floating, integer
 
-metaInfoPath = os.path.normpath(os.path.join(os.path.dirname(os.path.abspath(__file__)),"../../../../nomad-meta-info/meta_info/nomad_meta_info/siesta.nomadmetainfo.json"))
+metaInfoPath = os.path.normpath(os.path.join(os.path.dirname(os.path.abspath(__file__)),"../../../../nomad-meta-info/meta_info/nomad_meta_info/gulp.nomadmetainfo.json"))
 metaInfoEnv, warnings = loadJsonFile(filePath=metaInfoPath,
                                      dependencyLoader=None,
                                      extraArgsHandling=InfoKindEl.ADD_EXTRA_ARGS,
@@ -67,8 +69,26 @@ def ArraySM(header, row, build, **kwargs):
             **kwargs)
     return sm
 
-
-def get_array(metaname, dtype, istart=0, iend=None, unit=None):
+def get_frac_coords(backend, lines):
+    #print('LINES')
+    positions = []
+    symbols = []
+    for line in lines:
+        tokens = [t for t in line.split() if not t == '*']
+        sym = tokens[1]
+        assert tokens[2] == 'c'
+        pos = [float(x) for x in tokens[3:6]]
+        assert len(pos) == 3
+        positions.append(pos)
+        symbols.append(sym)
+    positions = np.array(positions)
+    symbols = np.array(symbols)
+    backend.addArrayValues('x_gulp_atomic_basis_symbols', symbols)
+    backend.addArrayValues('x_gulp_atomic_basis_positions', positions)
+
+
+def get_array(metaname, dtype=float, istart=0, iend=None, unit=None,
+              storage=None):
     @errprint
     def buildarray(backend, lines):
         arr = tokenize(lines)
@@ -79,35 +99,122 @@ def get_array(metaname, dtype, istart=0, iend=None, unit=None):
         arr = arr.astype(dtype)
         if unit is not None:
             arr = convert_unit(arr, unit)
-        backend.addArrayValues(metaname, arr)
+        if storage is not None:
+            storage[metaname] = arr
+        else:
+            backend.addArrayValues(metaname, arr)
     return buildarray
 
+#def hello(parser):
+#    print('hello')
+
+
+
+class GulpContext(object):
+    def __init__(self):
+        self.data = {}
+
+    def startedParsing(self, fname, parser):
+        pass
+
+    def save_array(self, key, dtype=float, istart=0, iend=None,
+                   unit=None):
+        return get_array(key, dtype=dtype, istart=istart, iend=iend, unit=unit,
+                         storage=self.data)
+
+    def onClose_section_system(self, backend, gindex, section):
+        group = section['x_gulp_patterson_group']
+        # group may be none ---- no spacegroup
+        #sdkfjsdkfj
+        cellpar = [section['x_gulp_cell_a'],
+                   section['x_gulp_cell_b'],
+                   section['x_gulp_cell_c'],
+                   section['x_gulp_cell_alpha'],
+                   section['x_gulp_cell_beta'],
+                   section['x_gulp_cell_gamma']]
+        for x in cellpar:
+            assert len(x) == 1
+        from ase.spacegroup import crystal
+        num = get_spacegroup_number(section['x_gulp_space_group'][0])
+        atoms = crystal(self.chem_symbols_asymm_unit,
+                        basis=self.frac_coords_asymm_unit,
+                        spacegroup=num,
+                        cellpar=[x[0] for x in cellpar])
+                        #cell=self.data['cell']
+
+        #from ase.visualize import view
+        #view(atoms)
+
+    def adhoc_get_frac_coords(self, backend, lines):
+        positions = []
+        symbols = []
+        for line in lines:
+            tokens = [t for t in line.split() if not t == '*']
+            sym = tokens[1]
+            assert tokens[2] == 'c'
+            pos = [float(x) for x in tokens[3:6]]
+            assert len(pos) == 3
+            positions.append(pos)
+            symbols.append(sym)
+        positions = np.array(positions)
+        symbols = np.array(symbols)
+        self.frac_coords_asymm_unit = positions
+        self.chem_symbols_asymm_unit = symbols
+        #backend.addArrayValues('x_gulp_atomic_basis_symbols', symbols)
+        #backend.addArrayValues('x_gulp_atomic_basis_positions', positions)
+
+context = GulpContext()
 
 infoFileDescription = SM(
     name='root',
     weak=True,
     startReStr='',
     fixedStartValues={'program_name': 'gulp'},
-    sections=['section_run'],
+    sections=['section_run', 'section_system'],
     subMatchers=[
         SM(r'\*\s*Version\s*=\s*(?P<program_version>\S+)',
            name='version'),
+        SM(r'\s*Symmetry\s*:',
+           name='symm-header',
+           subMatchers=[
+               SM(r'\s*Space group \S+\s+:\s*(?P<x_gulp_space_group>.+?)\s*$',
+                  name='spacegroup'),
+               SM(r'\s*Patterson group\s*:\s*(?P<x_gulp_patterson_group>.+?)\s*$',
+                  name='patterson'),
+           ]),
         SM(r'\s*Cartesian lattice vectors \(Angstroms\) :',
            name='lattice-header',
            subMatchers=[
                ArraySM(r'',
                        r'\s*\S+\s*\S+\s*\S+',
-                       get_array('simulation_cell', float,
-                                 unit='angstrom'))
-           ])
+                       context.save_array('cell'))
+           ]),
+        SM(r'\s*Primitive cell parameters\s*:\s*Full cell parameters\s*:',
+           name='cellpar1',
+           subMatchers=[
+               SM(r'\s*a\s*=\s*\S+\s*alpha\s*=\s*\S+\s*a\s*=\s*(?P<x_gulp_cell_a>\S+)\s+alpha\s*=\s*(?P<x_gulp_cell_alpha>\S+)'),
+               SM(r'\s*b\s*=\s*\S+\s*beta\s*=\s*\S+\s*b =\s*(?P<x_gulp_cell_b>\S+)\s+beta\s*=\s*(?P<x_gulp_cell_beta>\S+)'),
+               SM(r'\s*c\s*=\s*\S+\s*gamma\s*=\s*\S+\s*c =\s*(?P<x_gulp_cell_c>\S+)\s+gamma\s*=\s*(?P<x_gulp_cell_gamma>\S+)'),
+           ]),
+        SM(r'\s*Cell parameters\s*\(Angstroms/Degrees\):',
+           name='cellpar2',
+           subMatchers=[
+               SM(r'\s*a =\s*(?P<x_gulp_cell_a>\S+)\s*alpha\s*=\s*(?P<x_gulp_cell_alpha>\S+)'),
+               SM(r'\s*b =\s*(?P<x_gulp_cell_b>\S+)\s*beta\s*=\s*(?P<x_gulp_cell_beta>\S+)'),
+               SM(r'\s*c =\s*(?P<x_gulp_cell_c>\S+)\s*gamma\s*=\s*(?P<x_gulp_cell_gamma>\S+)'),
+           ]),
+        SM(r'\s*Fractional coordinates of asymmetric unit\s*:',
+           subFlags=SM.SubFlags.Sequenced,
+           name='frac-coords',
+           subMatchers=[
+               SM(r'------------', name='bar'),
+               ArraySM(r'------------',
+                       r'\s*\d+\s+\S+\s+c\s+.*',
+                       context.adhoc_get_frac_coords),
+               SM(r'-------------')
+           ]),
     ])
 
-class SiestaContext(object):
-    def startedParsing(self, fname, parser):
-        pass
-
-context = SiestaContext()
-
 def main(**kwargs):
     mainFunction(mainFileDescription=infoFileDescription,
                  metaInfoEnv=metaInfoEnv,
diff --git a/parser/parser-gulp/spacegroups.py b/parser/parser-gulp/spacegroups.py
new file mode 100644
index 0000000..90e31c2
--- /dev/null
+++ b/parser/parser-gulp/spacegroups.py
@@ -0,0 +1,81 @@
+# All the space groups that GULP will print
+sg = ['P 1             ','P -1            ','P 2             ','P 21            ',
+      'C 2             ','P M             ','P C             ','C M             ','C C             ',
+      'P 2/M           ','P 21/M          ','C 2/M           ','P 2/C           ','P 21/C          ',
+      'C 2/C           ','P 2 2 2         ','P 2 2 21        ','P 21 21 2       ','P 21 21 21      ',
+      'C 2 2 21        ','C 2 2 2         ','F 2 2 2         ','I 2 2 2         ','I 21 21 21      ',
+      'P M M 2         ','P M C 21        ','P C C 2         ','P M A 2         ','P C A 21        ',
+      'P N C 2         ','P M N 21        ','P B A 2         ','P N A 21        ','P N N 2         ',
+      'C M M 2         ','C M C 21        ','C C C 2         ','A M M 2         ','A B M 2         ',
+      'A M A 2         ','A B A 2         ','F M M 2         ','F D D 2         ','I M M 2         ',
+      'I B A 2         ','I M A 2         ','P M M M         ','P N N N         ','P C C M         ',
+      'P B A N         ','P M M A         ','P N N A         ','P M N A         ','P C C A         ',
+      'P B A M         ','P C C N         ','P B C M         ','P N N M         ','P M M N         ',
+      'P B C N         ','P B C A         ','P N M A         ','C M C M         ',
+      'C M C A         ','C M M M         ','C C C M         ','C M M A         ','C C C A         ',
+      'F M M M         ','F D D D         ','I M M M         ','I B A M         ','I B C A         ',
+      'I M M A         ','P 4             ','P 41            ','P 42            ','P 43            ',
+      'I 4             ','I 41            ','P -4            ','I -4            ','P 4/M           ',
+      'P 42/M          ','P 4/N           ','P 42/N          ','I 4/M           ','I 41/A          ',
+      'P 4 2 2         ','P 4 21 2        ','P 41 2 2        ','P 41 21 2       ','P 42 2 2        ',
+      'P 42 21 2       ','P 43 2 2        ','P 43 21 2       ','I 4 2 2         ','I 41 2 2        ',
+      'P 4 M M         ','P 4 B M         ','P 42 C M        ','P 42 N M        ','P 4 C C         ',
+      'P 4 N C         ','P 42 M C        ','P 42 B C        ','I 4 M M         ','I 4 C M         ',
+      'I 41 M D        ','I 41 C D        ','P -4 2 M        ','P -4 2 C        ','P -4 21 M       ',
+      'P -4 21 C       ','P -4 M 2        ','P -4 C 2        ','P -4 B 2        ','P -4 N 2        ',
+      'I -4 M 2        ','I -4 C 2        ','I -4 2 M        ','I -4 2 D        ',
+      'P 4/M M M       ','P 4/M C C       ','P 4/N B M       ','P 4/N N C       ','P 4/M B M       ',
+      'P 4/M N C       ','P 4/N M M       ','P 4/N C C       ','P 42/M M C      ','P 42/M C M      ',
+      'P 42/N B C      ','P 42/N N M      ','P 42/M B C      ','P 42/M N M      ','P 42/N M C      ',
+      'P 42/N C M      ','I 4/M M M       ','I 4/M C M       ','I 41/A M D      ','I 41/A C D      ',
+      'P 3             ','P 31            ','P 32            ','R 3             ','P -3            ',
+      'R -3            ','P 3 1 2         ','P 3 2 1         ','P 31 1 2        ','P 31 2 1        ',
+      'P 32 1 2        ','P 32 2 1        ','R 3 2           ','P 3 M 1         ','P 3 1 M         ',
+      'P 3 C 1         ','P 3 1 C         ','R 3 M           ','R 3 C           ','P -3 1 M        ',
+      'P -3 1 C        ','P -3 M 1        ','P -3 C 1        ','R -3 M          ','R -3 C          ',
+      'P 6             ','P 61            ','P 65            ','P 62            ','P 64            ',
+      'P 63            ','P -6            ','P 6/M           ','P 63/M          ','P 6 2 2         ',
+      'P 61 2 2        ','P 65 2 2        ','P 62 2 2        ','P 64 2 2        ',
+      'P 63 2 2        ','P 6 M M         ','P 6 C C         ','P 63 C M        ','P 63 M C        ',
+      'P -6 M 2        ','P -6 C 2        ','P -6 2 M        ','P -6 2 C        ','P 6/M M M       ',
+      'P 6/M C C       ','P 63/M C M      ','P 63/M M C      ','P 2 3           ','F 2 3           ',
+      'I 2 3           ','P 21 3          ','I 21 3          ','P M 3           ','P N 3           ',
+      'F M 3           ','F D 3           ','I M 3           ','P A 3           ','I A 3           ',
+      'P 4 3 2         ','P 42 3 2        ','F 4 3 2         ','F 41 3 2        ','I 4 3 2         ',
+      'P 43 3 2        ','P 41 3 2        ','I 41 3 2        ','P -4 3 M        ','F -4 3 M        ',
+      'I -4 3 M        ','P -4 3 N        ','F -4 3 C        ','I -4 3 D        ','P M 3 M         ',
+      'P N 3 N         ','P M 3 N         ','P N 3 M         ','F M 3 M         ','F M 3 C         ',
+      'F D 3 M         ','F D 3 C         ','I M 3 M         ','I A 3 D         ','C 1             ',
+      'C -1            ']
+sg = [string.strip() for string in sg]
+
+# GULP contains at least some errors in the space groups.  Thus we have to somehow hack things until they work.
+
+sgdict = {}
+
+missing_minus = set(range(200, 207))
+missing_minus.union(range(221,231))
+
+for i, name in enumerate(sg):
+    num = i + 1
+    assert name not in sgdict
+    sgdict[name] = num
+    if num in missing_minus:
+        correct_name = name.replace('3', '-3')
+        assert correct_name not in sgdict
+        sgdict[correct_name] = num
+
+def get_spacegroup_number(name):
+    return sgdict[name]
+
+#for i
+#print(len(sg))
+#from ase.spacegroup import Spacegroup
+#asesym = []
+#for i in range(1, 231):
+#    asesym.append(Spacegroup(i).symbol)
+
+#for i, (s1, s2) in enumerate(zip(asesym, sg)):
+#    is_ok = (s1.lower() == s2.lower())
+#    print('%3d %10s %10s %10s' % (i + 1, s1, s2, is_ok))
+
-- 
GitLab